From ecd071383a6d544a948294692d6eadaae6a2dfa0 Mon Sep 17 00:00:00 2001
From: Attractadore <attractadore02@gmail.com>
Date: Fri, 26 May 2017 09:07:10 +0300
Subject: [PATCH] =?UTF-8?q?[MRG+1]=20Added=20n=5Fcomponents=20parameter=20?=
 =?UTF-8?q?to=20LatentDirichletAllocation=20to=20replace=20=E2=80=A6=20(#8?=
 =?UTF-8?q?922)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[MRG+2] Added n_components parameter to LatentDirichletAllocation to replace …
---
 .../plot_topics_extraction_with_nmf_lda.py    |  10 +-
 sklearn/decomposition/online_lda.py           |  60 ++++++-----
 .../decomposition/tests/test_online_lda.py    | 101 ++++++++++--------
 3 files changed, 95 insertions(+), 76 deletions(-)

diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py
index e1a6f0bdbacd9..04ab2809f36b1 100644
--- a/examples/applications/plot_topics_extraction_with_nmf_lda.py
+++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py
@@ -14,7 +14,7 @@
 functions: the Frobenius norm, and the generalized Kullback-Leibler divergence.
 The latter is equivalent to Probabilistic Latent Semantic Indexing.
 
-The default parameters (n_samples / n_features / n_topics) should make
+The default parameters (n_samples / n_features / n_components) should make
 the example runnable in a couple of tens of seconds. You can try to
 increase the dimensions of the problem, but be aware that the time
 complexity is polynomial in NMF. In LDA, the time complexity is
@@ -36,7 +36,7 @@
 
 n_samples = 2000
 n_features = 1000
-n_topics = 10
+n_components = 10
 n_top_words = 20
 
 
@@ -85,7 +85,7 @@ def print_top_words(model, feature_names, n_top_words):
       "n_samples=%d and n_features=%d..."
       % (n_samples, n_features))
 t0 = time()
-nmf = NMF(n_components=n_topics, random_state=1,
+nmf = NMF(n_components=n_components, random_state=1,
           alpha=.1, l1_ratio=.5).fit(tfidf)
 print("done in %0.3fs." % (time() - t0))
 
@@ -98,7 +98,7 @@ def print_top_words(model, feature_names, n_top_words):
       "tf-idf features, n_samples=%d and n_features=%d..."
       % (n_samples, n_features))
 t0 = time()
-nmf = NMF(n_components=n_topics, random_state=1, beta_loss='kullback-leibler',
+nmf = NMF(n_components=n_components, random_state=1, beta_loss='kullback-leibler',
           solver='mu', max_iter=1000, alpha=.1, l1_ratio=.5).fit(tfidf)
 print("done in %0.3fs." % (time() - t0))
 
@@ -109,7 +109,7 @@ def print_top_words(model, feature_names, n_top_words):
 print("Fitting LDA models with tf features, "
       "n_samples=%d and n_features=%d..."
       % (n_samples, n_features))
-lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
+lda = LatentDirichletAllocation(n_components=n_components, max_iter=5,
                                 learning_method='online',
                                 learning_offset=50.,
                                 random_state=0)
diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py
index 4717bd5af80a3..657ce3ece7e3f 100644
--- a/sklearn/decomposition/online_lda.py
+++ b/sklearn/decomposition/online_lda.py
@@ -143,17 +143,17 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    n_topics : int, optional (default=10)
+    n_components : int, optional (default=10)
         Number of topics.
 
     doc_topic_prior : float, optional (default=None)
         Prior of document topic distribution `theta`. If the value is None,
-        defaults to `1 / n_topics`.
+        defaults to `1 / n_components`.
         In the literature, this is called `alpha`.
 
     topic_word_prior : float, optional (default=None)
         Prior of topic word distribution `beta`. If the value is None, defaults
-        to `1 / n_topics`.
+        to `1 / n_components`.
         In the literature, this is called `eta`.
 
     learning_method : 'batch' | 'online', default='online'
@@ -224,10 +224,15 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
-
+         
+    n_topics : int, optional (default=None)
+        This parameter has been renamed to n_components and will
+        be removed in version 0.21.
+        .. deprecated:: 0.19
+        
     Attributes
     ----------
-    components_ : array, [n_topics, n_features]
+    components_ : array, [n_components, n_features]
         Variational parameters for topic word distribution. Since the complete
         conditional for topic word distribution is a Dirichlet,
         ``components_[i, j]`` can be viewed as pseudocount that represents the
@@ -255,13 +260,13 @@ class LatentDirichletAllocation(BaseEstimator, TransformerMixin):
 
     """
 
-    def __init__(self, n_topics=10, doc_topic_prior=None,
+    def __init__(self, n_components=10, doc_topic_prior=None,
                  topic_word_prior=None, learning_method=None,
                  learning_decay=.7, learning_offset=10., max_iter=10,
                  batch_size=128, evaluate_every=-1, total_samples=1e6,
                  perp_tol=1e-1, mean_change_tol=1e-3, max_doc_update_iter=100,
-                 n_jobs=1, verbose=0, random_state=None):
-        self.n_topics = n_topics
+                 n_jobs=1, verbose=0, random_state=None, n_topics=None):
+        self.n_components = n_components
         self.doc_topic_prior = doc_topic_prior
         self.topic_word_prior = topic_word_prior
         self.learning_method = learning_method
@@ -277,13 +282,20 @@ def __init__(self, n_topics=10, doc_topic_prior=None,
         self.n_jobs = n_jobs
         self.verbose = verbose
         self.random_state = random_state
+        self.n_topics = n_topics
 
     def _check_params(self):
         """Check model parameters."""
+        if self.n_topics is not None:
+            self._n_components = self.n_topics
+            warnings.warn("n_topics has been renamed to n_components in version 0.19 "
+                          "and will be removed in 0.21", DeprecationWarning)
+        else:
+            self._n_components = self.n_components
 
-        if self.n_topics <= 0:
-            raise ValueError("Invalid 'n_topics' parameter: %r"
-                             % self.n_topics)
+        if self._n_components <= 0:
+            raise ValueError("Invalid 'n_components' parameter: %r"
+                             % self._n_components)
 
         if self.total_samples <= 0:
             raise ValueError("Invalid 'total_samples' parameter: %r"
@@ -305,12 +317,12 @@ def _init_latent_vars(self, n_features):
         self.n_iter_ = 0
 
         if self.doc_topic_prior is None:
-            self.doc_topic_prior_ = 1. / self.n_topics
+            self.doc_topic_prior_ = 1. / self._n_components
         else:
             self.doc_topic_prior_ = self.doc_topic_prior
 
         if self.topic_word_prior is None:
-            self.topic_word_prior_ = 1. / self.n_topics
+            self.topic_word_prior_ = 1. / self._n_components
         else:
             self.topic_word_prior_ = self.topic_word_prior
 
@@ -318,7 +330,7 @@ def _init_latent_vars(self, n_features):
         init_var = 1. / init_gamma
         # In the literature, this is called `lambda`
         self.components_ = self.random_state_.gamma(
-            init_gamma, init_var, (self.n_topics, n_features))
+            init_gamma, init_var, (self._n_components, n_features))
 
         # In the literature, this is `exp(E[log(beta)])`
         self.exp_dirichlet_component_ = np.exp(
@@ -409,7 +421,7 @@ def _em_step(self, X, total_samples, batch_update, parallel=None):
 
         Returns
         -------
-        doc_topic_distr : array, shape=(n_samples, n_topics)
+        doc_topic_distr : array, shape=(n_samples, n_components)
             Unnormalized document topic distribution.
         """
 
@@ -569,7 +581,7 @@ def _unnormalized_transform(self, X):
 
         Returns
         -------
-        doc_topic_distr : shape=(n_samples, n_topics)
+        doc_topic_distr : shape=(n_samples, n_components)
             Document topic distribution for X.
         """
         if not hasattr(self, 'components_'):
@@ -603,7 +615,7 @@ def transform(self, X):
 
         Returns
         -------
-        doc_topic_distr : shape=(n_samples, n_topics)
+        doc_topic_distr : shape=(n_samples, n_components)
             Document topic distribution for X.
         """
         doc_topic_distr = self._unnormalized_transform(X)
@@ -622,7 +634,7 @@ def _approx_bound(self, X, doc_topic_distr, sub_sampling):
         X : array-like or sparse matrix, shape=(n_samples, n_features)
             Document word matrix.
 
-        doc_topic_distr : array, shape=(n_samples, n_topics)
+        doc_topic_distr : array, shape=(n_samples, n_components)
             Document topic distribution. In the literature, this is called
             gamma.
 
@@ -644,7 +656,7 @@ def _loglikelihood(prior, distr, dirichlet_distr, size):
             return score
 
         is_sparse_x = sp.issparse(X)
-        n_samples, n_topics = doc_topic_distr.shape
+        n_samples, n_components = doc_topic_distr.shape
         n_features = self.components_.shape[1]
         score = 0
 
@@ -673,7 +685,7 @@ def _loglikelihood(prior, distr, dirichlet_distr, size):
 
         # compute E[log p(theta | alpha) - log q(theta | gamma)]
         score += _loglikelihood(doc_topic_prior, doc_topic_distr,
-                                dirichlet_doc_topic, self.n_topics)
+                                dirichlet_doc_topic, self._n_components)
 
         # Compensate for the subsampling of the population of documents
         if sub_sampling:
@@ -717,7 +729,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
         X : array-like or sparse matrix, [n_samples, n_features]
             Document word matrix.
 
-        doc_topic_distr : None or array, shape=(n_samples, n_topics)
+        doc_topic_distr : None or array, shape=(n_samples, n_components)
             Document topic distribution.
             If it is None, it will be generated by applying transform on X.
 
@@ -736,12 +748,12 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None,
         if doc_topic_distr is None:
             doc_topic_distr = self._unnormalized_transform(X)
         else:
-            n_samples, n_topics = doc_topic_distr.shape
+            n_samples, n_components = doc_topic_distr.shape
             if n_samples != X.shape[0]:
                 raise ValueError("Number of samples in X and doc_topic_distr"
                                  " do not match.")
 
-            if n_topics != self.n_topics:
+            if n_components != self._n_components:
                 raise ValueError("Number of topics does not match.")
 
         current_samples = X.shape[0]
@@ -769,7 +781,7 @@ def perplexity(self, X, doc_topic_distr='deprecated', sub_sampling=False):
         X : array-like or sparse matrix, [n_samples, n_features]
             Document word matrix.
 
-        doc_topic_distr : None or array, shape=(n_samples, n_topics)
+        doc_topic_distr : None or array, shape=(n_samples, n_components)
             Document topic distribution.
             This argument is deprecated and is currently being ignored.
 
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index c3a221fe4800a..597681dcf8118 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -2,6 +2,7 @@
 from scipy.linalg import block_diag
 from scipy.sparse import csr_matrix
 from scipy.special import psi
+import warnings
 
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.decomposition._online_lda import (_dirichlet_expectation_1d,
@@ -23,22 +24,22 @@
 def _build_sparse_mtx():
     # Create 3 topics and each topic has 3 distinct words.
     # (Each word only belongs to a single topic.)
-    n_topics = 3
-    block = n_topics * np.ones((3, 3))
-    blocks = [block] * n_topics
+    n_components = 3
+    block = n_components * np.ones((3, 3))
+    blocks = [block] * n_components
     X = block_diag(*blocks)
     X = csr_matrix(X)
-    return (n_topics, X)
+    return (n_components, X)
 
 
 def test_lda_default_prior_params():
     # default prior parameter should be `1 / topics`
     # and verbose params should not affect result
-    n_topics, X = _build_sparse_mtx()
-    prior = 1. / n_topics
-    lda_1 = LatentDirichletAllocation(n_topics=n_topics, doc_topic_prior=prior,
+    n_components, X = _build_sparse_mtx()
+    prior = 1. / n_components
+    lda_1 = LatentDirichletAllocation(n_components=n_components, doc_topic_prior=prior,
                                       topic_word_prior=prior, random_state=0)
-    lda_2 = LatentDirichletAllocation(n_topics=n_topics, random_state=0)
+    lda_2 = LatentDirichletAllocation(n_components=n_components, random_state=0)
 
     topic_distr_1 = lda_1.fit_transform(X)
     topic_distr_2 = lda_2.fit_transform(X)
@@ -48,8 +49,8 @@ def test_lda_default_prior_params():
 def test_lda_fit_batch():
     # Test LDA batch learning_offset (`fit` method with 'batch' learning)
     rng = np.random.RandomState(0)
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, evaluate_every=1,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, evaluate_every=1,
                                     learning_method='batch', random_state=rng)
     lda.fit(X)
 
@@ -63,8 +64,8 @@ def test_lda_fit_batch():
 def test_lda_fit_online():
     # Test LDA online learning (`fit` method with 'online' learning)
     rng = np.random.RandomState(0)
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=10.,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, learning_offset=10.,
                                     evaluate_every=1, learning_method='online',
                                     random_state=rng)
     lda.fit(X)
@@ -80,8 +81,8 @@ def test_lda_partial_fit():
     # Test LDA online learning (`partial_fit` method)
     # (same as test_lda_batch)
     rng = np.random.RandomState(0)
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=10.,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, learning_offset=10.,
                                     total_samples=100, random_state=rng)
     for i in xrange(3):
         lda.partial_fit(X)
@@ -95,8 +96,8 @@ def test_lda_partial_fit():
 def test_lda_dense_input():
     # Test LDA with dense input.
     rng = np.random.RandomState(0)
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, learning_method='batch',
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, learning_method='batch',
                                     random_state=rng)
     lda.fit(X.toarray())
 
@@ -112,8 +113,8 @@ def test_lda_transform():
     # Transform result cannot be negative and should be normalized
     rng = np.random.RandomState(0)
     X = rng.randint(5, size=(20, 10))
-    n_topics = 3
-    lda = LatentDirichletAllocation(n_topics=n_topics, random_state=rng)
+    n_components = 3
+    lda = LatentDirichletAllocation(n_components=n_components, random_state=rng)
     X_trans = lda.fit_transform(X)
     assert_true((X_trans > 0.0).any())
     assert_array_almost_equal(np.sum(X_trans, axis=1), np.ones(X_trans.shape[0]))
@@ -125,7 +126,7 @@ def test_lda_fit_transform():
     for method in ('online', 'batch'):
         rng = np.random.RandomState(0)
         X = rng.randint(10, size=(50, 20))
-        lda = LatentDirichletAllocation(n_topics=5, learning_method=method,
+        lda = LatentDirichletAllocation(n_components=5, learning_method=method,
                                         random_state=rng)
         X_fit = lda.fit_transform(X)
         X_trans = lda.transform(X)
@@ -135,11 +136,11 @@ def test_lda_fit_transform():
 def test_lda_partial_fit_dim_mismatch():
     # test `n_features` mismatch in `partial_fit`
     rng = np.random.RandomState(0)
-    n_topics = rng.randint(3, 6)
+    n_components = rng.randint(3, 6)
     n_col = rng.randint(6, 10)
     X_1 = np.random.randint(4, size=(10, n_col))
     X_2 = np.random.randint(4, size=(10, n_col + 1))
-    lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=5.,
+    lda = LatentDirichletAllocation(n_components=n_components, learning_offset=5.,
                                     total_samples=20, random_state=rng)
     lda.partial_fit(X_1)
     assert_raises_regexp(ValueError, r"^The provided data has",
@@ -151,7 +152,7 @@ def test_invalid_params():
     X = np.ones((5, 10))
 
     invalid_models = (
-        ('n_topics', LatentDirichletAllocation(n_topics=0)),
+        ('n_components', LatentDirichletAllocation(n_components=0)),
         ('learning_method',
          LatentDirichletAllocation(learning_method='unknown')),
         ('total_samples', LatentDirichletAllocation(total_samples=0)),
@@ -186,8 +187,8 @@ def test_lda_transform_mismatch():
     X = rng.randint(4, size=(20, 10))
     X_2 = rng.randint(4, size=(10, 8))
 
-    n_topics = rng.randint(3, 6)
-    lda = LatentDirichletAllocation(n_topics=n_topics, random_state=rng)
+    n_components = rng.randint(3, 6)
+    lda = LatentDirichletAllocation(n_components=n_components, random_state=rng)
     lda.partial_fit(X)
     assert_raises_regexp(ValueError, r"^The provided data has",
                          lda.partial_fit, X_2)
@@ -195,11 +196,11 @@ def test_lda_transform_mismatch():
 
 @if_safe_multiprocessing_with_blas
 def test_lda_multi_jobs():
-    n_topics, X = _build_sparse_mtx()
+    n_components, X = _build_sparse_mtx()
     # Test LDA batch training with multi CPU
     for method in ('online', 'batch'):
         rng = np.random.RandomState(0)
-        lda = LatentDirichletAllocation(n_topics=n_topics, n_jobs=2,
+        lda = LatentDirichletAllocation(n_components=n_components, n_jobs=2,
                                         learning_method=method,
                                         evaluate_every=1,
                                         random_state=rng)
@@ -215,8 +216,8 @@ def test_lda_multi_jobs():
 def test_lda_partial_fit_multi_jobs():
     # Test LDA online training with multi CPU
     rng = np.random.RandomState(0)
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, n_jobs=2,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, n_jobs=2,
                                     learning_offset=5., total_samples=30,
                                     random_state=rng)
     for i in range(2):
@@ -231,31 +232,31 @@ def test_lda_partial_fit_multi_jobs():
 def test_lda_preplexity_mismatch():
     # test dimension mismatch in `perplexity` method
     rng = np.random.RandomState(0)
-    n_topics = rng.randint(3, 6)
+    n_components = rng.randint(3, 6)
     n_samples = rng.randint(6, 10)
     X = np.random.randint(4, size=(n_samples, 10))
-    lda = LatentDirichletAllocation(n_topics=n_topics, learning_offset=5.,
+    lda = LatentDirichletAllocation(n_components=n_components, learning_offset=5.,
                                     total_samples=20, random_state=rng)
     lda.fit(X)
     # invalid samples
-    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_topics))
+    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_components))
     assert_raises_regexp(ValueError, r'Number of samples',
                          lda._perplexity_precomp_distr, X, invalid_n_samples)
     # invalid topic number
-    invalid_n_topics = rng.randint(4, size=(n_samples, n_topics + 1))
+    invalid_n_components = rng.randint(4, size=(n_samples, n_components + 1))
     assert_raises_regexp(ValueError, r'Number of topics',
-                         lda._perplexity_precomp_distr, X, invalid_n_topics)
+                         lda._perplexity_precomp_distr, X, invalid_n_components)
 
 
 def test_lda_perplexity():
     # Test LDA perplexity for batch training
     # perplexity should be lower after each iteration
-    n_topics, X = _build_sparse_mtx()
+    n_components, X = _build_sparse_mtx()
     for method in ('online', 'batch'):
-        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
+        lda_1 = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                           learning_method=method,
                                           total_samples=100, random_state=0)
-        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
+        lda_2 = LatentDirichletAllocation(n_components=n_components, max_iter=10,
                                           learning_method=method,
                                           total_samples=100, random_state=0)
         lda_1.fit(X)
@@ -273,12 +274,12 @@ def test_lda_perplexity():
 def test_lda_score():
     # Test LDA score for batch training
     # score should be higher after each iteration
-    n_topics, X = _build_sparse_mtx()
+    n_components, X = _build_sparse_mtx()
     for method in ('online', 'batch'):
-        lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
+        lda_1 = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                           learning_method=method,
                                           total_samples=100, random_state=0)
-        lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
+        lda_2 = LatentDirichletAllocation(n_components=n_components, max_iter=10,
                                           learning_method=method,
                                           total_samples=100, random_state=0)
         lda_1.fit_transform(X)
@@ -292,8 +293,8 @@ def test_lda_score():
 def test_perplexity_input_format():
     # Test LDA perplexity for sparse and dense input
     # score should be the same for both dense and sparse input
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                     learning_method='batch',
                                     total_samples=100, random_state=0)
     lda.fit(X)
@@ -304,8 +305,8 @@ def test_perplexity_input_format():
 
 def test_lda_score_perplexity():
     # Test the relationship between LDA score and perplexity
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, max_iter=10,
                                     random_state=0)
     lda.fit(X)
     perplexity_1 = lda.perplexity(X, sub_sampling=False)
@@ -318,8 +319,8 @@ def test_lda_score_perplexity():
 def test_lda_fit_perplexity():
     # Test that the perplexity computed during fit is consistent with what is
     # returned by the perplexity method
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                     learning_method='batch', random_state=0,
                                     evaluate_every=1)
     lda.fit(X)
@@ -336,8 +337,8 @@ def test_lda_fit_perplexity():
 def test_doc_topic_distr_deprecation():
     # Test that the appropriate warning message is displayed when a user
     # attempts to pass the doc_topic_distr argument to the perplexity method
-    n_topics, X = _build_sparse_mtx()
-    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=1,
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_components=n_components, max_iter=1,
                                     learning_method='batch',
                                     total_samples=100, random_state=0)
     distr1 = lda.fit_transform(X)
@@ -367,3 +368,9 @@ def test_dirichlet_expectation():
     assert_allclose(_dirichlet_expectation_2d(x),
                     psi(x) - psi(np.sum(x, axis=1)[:, np.newaxis]),
                     rtol=1e-11, atol=3e-9)
+
+
+def test_lda_n_topics_deprecation():
+    n_components, X = _build_sparse_mtx()
+    lda = LatentDirichletAllocation(n_topics=10, learning_method='batch')
+    assert_warns(DeprecationWarning, lda.fit, X)
\ No newline at end of file