Merge pull request scikit-learn#1 from MerwaneHAMADI/8657

Issue 8657
LisaThomas9 · Mar 28, 2017 · eb1184b · eb1184b
2 parents bc63f49 + 0cbca0c
commit eb1184b
Show file tree

Hide file tree

Showing 7 changed files with 25 additions and 25 deletions.
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
@@ -881,7 +881,7 @@ details.
 
    metrics.adjusted_mutual_info_score
    metrics.adjusted_rand_score
-   metrics.calinski_harabaz_score
+   metrics.calinski_harabasz_score
    metrics.completeness_score
    metrics.fowlkes_mallows_score
    metrics.homogeneity_completeness_v_measure

diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -1500,17 +1500,17 @@ Drawbacks
  * :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` : In this example
    the silhouette analysis is used to choose an optimal value for n_clusters.
 
-.. _calinski_harabaz_index:
+.. _calinski_harabasz_index:
 
-Calinski-Harabaz Index
+Calinski-Harabasz Index
 ----------------------
 
-If the ground truth labels are not known, the Calinski-Harabaz index
-(:func:`sklearn.metrics.calinski_harabaz_score`) can be used to evaluate the
-model, where a higher Calinski-Harabaz score relates to a model with better
+If the ground truth labels are not known, the Calinski-Harabasz index
+(:func:`sklearn.metrics.calinski_harabasz_score`) can be used to evaluate the
+model, where a higher Calinski-Harabasz score relates to a model with better
 defined clusters.
 
-For :math:`k` clusters, the Calinski-Harabaz score :math:`s` is given as the
+For :math:`k` clusters, the Calinski-Harabasz score :math:`s` is given as the
 ratio of the between-clusters dispersion mean and the within-cluster
 dispersion:
 
@@ -1537,14 +1537,14 @@ points in cluster :math:`q`.
   >>> X = dataset.data
   >>> y = dataset.target
 
-In normal usage, the Calinski-Harabaz index is applied to the results of a
+In normal usage, the Calinski-Harabasz index is applied to the results of a
 cluster analysis.
 
   >>> import numpy as np
   >>> from sklearn.cluster import KMeans
   >>> kmeans_model = KMeans(n_clusters=3, random_state=1).fit(X)
   >>> labels = kmeans_model.labels_
-  >>> metrics.calinski_harabaz_score(X, labels)  # doctest: +ELLIPSIS
+  >>> metrics.calinski_harabasz_score(X, labels)  # doctest: +ELLIPSIS
   560.39...
 
 
@@ -1560,7 +1560,7 @@ Advantages
 Drawbacks
 ~~~~~~~~~
 
-- The Calinski-Harabaz index is generally higher for convex clusters than other
+- The Calinski-Harabasz index is generally higher for convex clusters than other
   concepts of clusters, such as density based clusters like those obtained
   through DBSCAN.
 

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -608,7 +608,7 @@ Model selection and evaluation
      Index which measures the similarity of two clusterings of a set of points
      By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 
-   - Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+   - Added :func:`metrics.calinski_harabasz_score`, which computes the Calinski
      and Harabaz score to evaluate the resulting clustering of a set of points.
      By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 

diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
@@ -42,7 +42,7 @@
 from .cluster import fowlkes_mallows_score
 from .cluster import silhouette_samples
 from .cluster import silhouette_score
-from .cluster import calinski_harabaz_score
+from .cluster import calinski_harabasz_score
 from .cluster import v_measure_score
 
 from .pairwise import euclidean_distances

diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
@@ -19,12 +19,12 @@
 from .supervised import entropy
 from .unsupervised import silhouette_samples
 from .unsupervised import silhouette_score
-from .unsupervised import calinski_harabaz_score
+from .unsupervised import calinski_harabasz_score
 from .bicluster import consensus_score
 
 __all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
            "adjusted_rand_score", "completeness_score", "contingency_matrix",
            "expected_mutual_information", "homogeneity_completeness_v_measure",
            "homogeneity_score", "mutual_info_score", "v_measure_score",
            "fowlkes_mallows_score", "entropy", "silhouette_samples",
-           "silhouette_score", "calinski_harabaz_score", "consensus_score"]
+           "silhouette_score", "calinski_harabasz_score", "consensus_score"]
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -13,7 +13,7 @@
 from sklearn.metrics.cluster import silhouette_score
 from sklearn.metrics.cluster import silhouette_samples
 from sklearn.metrics import pairwise_distances
-from sklearn.metrics.cluster import calinski_harabaz_score
+from sklearn.metrics.cluster import calinski_harabasz_score
 
 
 def test_silhouette():
@@ -119,30 +119,30 @@ def test_non_numpy_labels():
         silhouette_score(list(X), list(y)), silhouette_score(X, y))
 
 
-def test_calinski_harabaz_score():
+def test_calinski_harabasz_score():
     rng = np.random.RandomState(seed=0)
 
     # Assert message when there is only one label
     assert_raise_message(ValueError, "Number of labels is",
-                         calinski_harabaz_score,
+                         calinski_harabasz_score,
                          rng.rand(10, 2), np.zeros(10))
 
     # Assert message when all point are in different clusters
     assert_raise_message(ValueError, "Number of labels is",
-                         calinski_harabaz_score,
+                         calinski_harabasz_score,
                          rng.rand(10, 2), np.arange(10))
 
     # Assert the value is 1. when all samples are equals
-    assert_equal(1., calinski_harabaz_score(np.ones((10, 2)),
+    assert_equal(1., calinski_harabasz_score(np.ones((10, 2)),
                                             [0] * 5 + [1] * 5))
 
     # Assert the value is 0. when all the mean cluster are equal
-    assert_equal(0., calinski_harabaz_score([[-1, -1], [1, 1]] * 10,
+    assert_equal(0., calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
                                             [0] * 10 + [1] * 10))
 
     # General case (with non numpy arrays)
     X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
          [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
     labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
-    assert_almost_equal(calinski_harabaz_score(X, labels),
+    assert_almost_equal(calinski_harabasz_score(X, labels),
                         45 * (40 - 4) / (5 * (4 - 1)))
diff --git a/sklearn/metrics/cluster/unsupervised.py b/sklearn/metrics/cluster/unsupervised.py
@@ -207,13 +207,13 @@ def silhouette_samples(X, labels, metric='euclidean', **kwds):
     return sil_samples
 
 
-def calinski_harabaz_score(X, labels):
-    """Compute the Calinski and Harabaz score.
+def calinski_harabasz_score(X, labels):
+    """Compute the Calinski and Harabasz score.
 
     The score is defined as ratio between the within-cluster dispersion and
     the between-cluster dispersion.
 
-    Read more in the :ref:`User Guide <calinski_harabaz_index>`.
+    Read more in the :ref:`User Guide <calinski_harabasz_index>`.
 
     Parameters
     ----------
@@ -227,7 +227,7 @@ def calinski_harabaz_score(X, labels):
     Returns
     -------
     score : float
-        The resulting Calinski-Harabaz score.
+        The resulting Calinski-Harabasz score.
 
     References
     ----------