In [45]:
%load_ext autoreload
%autoreload 2
import sys 
if '/Users/ericliu/Desktop/Latent-Dirichilet-Allocation' not in sys.path: 
    sys.path.append('/Users/ericliu/Desktop/Latent-Dirichilet-Allocation')
import torch as tr 
import numpy as np 
import pandas as pd 
from collections import defaultdict
from pprint import pprint
from scipy.special import psi, polygamma, gammaln, loggamma
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
from src.lda_model import LDASmoothed 
from src.generator import doc_generator 
from src.utils import (
    get_vocab_from_docs, 
    get_np_wct, 
    data_loader,
    text_pipeline, 
    process_documents,
) 
from src.text_pre_processor import (
    remove_accented_chars, 
    remove_special_characters, 
    remove_punctuation,
    remove_extra_whitespace_tabs,
    remove_stopwords,
)
from pprint import pprint 
import copy 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
gen = doc_generator(
    M = 3,
    L = 20, 
    topic_prior = tr.tensor([1,1,1,1,1], dtype=tr.double)
)

docs = gen.generate_doc()

Document: 0 | word: 0 -> topic: health -> word: contagious
Document: 0 | word: 1 -> topic: law -> word: divorce
Document: 0 | word: 2 -> topic: sport -> word: asymmetrical
Document: 0 | word: 3 -> topic: science -> word: bruise
Document: 0 | word: 4 -> topic: science -> word: infection
Document: 0 | word: 5 -> topic: law -> word: bankrupt
Document: 0 | word: 6 -> topic: art -> word: asymmetrical
Document: 0 | word: 7 -> topic: science -> word: quantum
Document: 0 | word: 8 -> topic: law -> word: contract
Document: 0 | word: 9 -> topic: science -> word: scientst
Document 0: contagious divorce asymmetrical bruise infection bankrupt asymmetrical quantum contract scientst

Document: 1 | word: 0 -> topic: sport -> word: game
Document: 1 | word: 1 -> topic: science -> word: bruise
Document: 1 | word: 2 -> topic: sport -> word: Symmetrical
Document: 1 | word: 3 -> topic: sport -> word: exercise
Document: 1 | word: 4 -> topic: science -> word: scientst
Document: 1 | word: 5 -> topic: sport -> 

In [47]:
result = process_documents(docs, sample=True) 

There are 3 documents in the dataset after processing
On average estimated document length is 10.0 words per document after processing
There are 21 unique vocab in the corpus after processing


In [48]:
import warnings
def init_lda(docs, vocab, n_topic, gibbs=False, random_state=0):
    if gibbs:
        global V, k, N, M, alpha, eta, n_iw, n_di
    else:
        global V, k, N, M, alpha, beta, gamma, phi
        
    np.random.seed(random_state)

    V = len(vocab)
    k = n_topic  # number of topics
    N = np.array([doc.shape[0] for doc in docs])
    M = len(docs)

    print(f"V: {V}\nk: {k}\nN: {N[:10]}...\nM: {M}")

    # initialize α, β
    np.random.random(42)
    if gibbs:
        alpha = np.random.gamma(shape=100, scale=0.01, size=1)  # one for all k
        eta = np.random.gamma(shape=100, scale=0.01, size=1)  # one for all V
        print(f"α: {alpha}\nη: {eta}")
        
        n_iw = np.zeros((k, V), dtype=int)
        n_di = np.zeros((M, k), dtype=int)
        print(f"n_iw: dim {n_iw.shape}\nn_di: dim {n_di.shape}")
    else:
        alpha = np.random.gamma(shape=100, scale=0.01, size=k) #np.random.rand(k)
        beta = np.random.dirichlet(np.ones(V), k)
        print(f"α: dim {alpha.shape}\nβ: dim {beta.shape}")

        # initialize ϕ, γ
        ## ϕ: (M x max(N) x k) arrays with zero paddings on the right
        gamma = alpha + np.ones((M, k)) * V / k

        phi = np.ones((M, max(N), k)) / k
        for m, N_d in enumerate(N):
            phi[m, N_d:, :] = 0  # zero padding for vectorized operations

        print(f"γ: dim {gamma.shape}\nϕ: dim ({len(phi)}, N_d, {phi[0].shape[1]})")

def E_step(docs, phi, gamma, alpha, beta):
    """
    Minorize the joint likelihood function via variational inference.
    This is the E-step of variational EM algorithm for LDA.
    """
    # optimize phi
    for m in range(M):
        #print(N[m], docs[m])
        phi[m, :N[m], :] = (beta[:, docs[m]] * np.exp(
            psi(gamma[m, :]) - psi(gamma[m, :].sum())
        ).reshape(-1, 1)).T

        # Normalize phi
        phi[m, :N[m]] /= phi[m, :N[m]].sum(axis=1).reshape(-1, 1)
        if np.any(np.isnan(phi)):
            raise ValueError("phi nan")
        
        

    # optimize gamma
    gamma = alpha + phi.sum(axis=1)

    

    return phi, gamma


def M_step(docs, phi, gamma, alpha, beta, M):
    """
    maximize the lower bound of the likelihood.
    This is the M-step of variational EM algorithm for (smoothed) LDA.
    
    update of alpha follows from appendix A.2 of Blei et al., 2003.
    """
    # update alpha
    alpha = _update(alpha, gamma, M)
    
    # update beta
    for j in range(V):
        beta[:, j] = np.array([_phi_dot_w(docs, phi, m, j) for m in range(M)]).sum(axis=0)
    beta /= beta.sum(axis=1).reshape(-1, 1)

    return alpha, beta

def _update(var, vi_var, const, max_iter=10000, tol=1e-6):
    """
    From appendix A.2 of Blei et al., 2003.
    For hessian with shape `H = diag(h) + 1z1'`
    
    To update alpha, input var=alpha and vi_var=gamma, const=M.
    To update eta, input var=eta and vi_var=lambda, const=k.
    """
    for _ in range(max_iter):
        # store old value
        var0 = var.copy()
        
        # g: gradient 
        psi_sum = psi(vi_var.sum(axis=1)).reshape(-1, 1)
        g = const * (psi(var.sum()) - psi(var)) \
            + (psi(vi_var) - psi_sum).sum(axis=0)

        # H = diag(h) + 1z1'
        z = const * polygamma(1, var.sum())  # z: Hessian constant component
        h = -const * polygamma(1, var)       # h: Hessian diagonal component
        c = (g / h).sum() / (1./z + (1./h).sum())

        # update var
        var -= (g - c) / h
        print(f"{vi_var.sum()}|{var0} -> {var}")
        
        # check convergence
        err = np.sqrt(np.mean((var - var0) ** 2))
        crit = err < tol
        if crit:
            break
    else:
        warnings.warn(f"max_iter={max_iter} reached: values might not be optimal.")
    
    #print(err)
    return var

def _phi_dot_w(docs, phi, d, j):
    """
    \sum_{n=1}^{N_d} ϕ_{dni} w_{dn}^j
    """
    # doc = np.zeros(docs[m].shape[0] * V, dtype=int)
    # doc[np.arange(0, docs[m].shape[0] * V, V) + docs[m]] = 1
    # doc = doc.reshape(-1, V)
    # lam += phi[m, :N[m], :].T @ doc
    return (docs[d] == j) @ phi[d, :N[d], :]

def dg(gamma, d, i):
    """
    E[log θ_t] where θ_t ~ Dir(gamma)
    """
    return psi(gamma[d, i]) - psi(np.sum(gamma[d, :]))


def dl(lam, i, w_n):
    """
    E[log β_t] where β_t ~ Dir(lam)
    """
    return psi(lam[i, w_n]) - psi(np.sum(lam[i, :]))

def vlb(docs, phi, gamma, alpha, beta, M, N, k):
    """
    Average variational lower bound for joint log likelihood.
    """
    lb = 0
    for d in range(M):
        lb += (
            gammaln(np.sum(alpha))
            - np.sum(gammaln(alpha))
            + np.sum([(alpha[i] - 1) * dg(gamma, d, i) for i in range(k)])
        )

        lb -= (
            gammaln(np.sum(gamma[d, :]))
            - np.sum(gammaln(gamma[d, :]))
            + np.sum([(gamma[d, i] - 1) * dg(gamma, d, i) for i in range(k)])
        )

        for n in range(N[d]):
            w_n = int(docs[d][n])

            lb += np.sum([phi[d][n, i] * dg(gamma, d, i) for i in range(k)])
            lb += np.sum([phi[d][n, i] * np.log(beta[i, w_n]) for i in range(k)])
            lb -= np.sum([phi[d][n, i] * np.log(phi[d][n, i]) for i in range(k)])

    return lb / M

In [49]:
result['vocab_to_idx']

{'contagious': 0,
 'divorce': 1,
 'asymmetrical': 2,
 'bruise': 3,
 'infection': 4,
 'bankrupt': 5,
 'quantum': 6,
 'contract': 7,
 'scientst': 8,
 'game': 9,
 'Symmetrical': 10,
 'exercise': 11,
 'astrophysics': 12,
 'content': 13,
 'football': 14,
 'appetite': 15,
 'copyright': 16,
 'accuse': 17,
 'research': 18,
 'electricity': 19,
 'injection': 20}

In [50]:
docs

{0: 'contagious divorce asymmetrical bruise infection bankrupt asymmetrical quantum contract scientst',
 1: 'game bruise Symmetrical exercise scientst bruise astrophysics content exercise football',
 2: 'appetite copyright bankrupt accuse appetite research electricity appetite injection bruise'}

In [51]:
docs_np = []
for doc in result['documents']: 

    doc_idx = []
    for n in range(len(doc)): 

        doc_idx.append(result['vocab_to_idx'][doc[n]])

    
    docs_np.append(np.array(doc_idx))
docs_np

[array([0, 1, 2, 3, 4, 5, 2, 6, 7, 8]),
 array([ 9,  3, 10, 11,  8,  3, 12, 13, 11, 14]),
 array([15, 16,  5, 17, 15, 18, 19, 15, 20,  3])]

In [52]:
docs = np.array(docs_np)
docs 

array([[ 0,  1,  2,  3,  4,  5,  2,  6,  7,  8],
       [ 9,  3, 10, 11,  8,  3, 12, 13, 11, 14],
       [15, 16,  5, 17, 15, 18, 19, 15, 20,  3]])

In [53]:
init_lda(docs, set(result['vocab_to_idx'].keys()), n_topic=5)

V: 21
k: 5
N: [10 10 10]...
M: 3
α: dim (5,)
β: dim (5, 21)
γ: dim (3, 5)
ϕ: dim (3, N_d, 5)


In [54]:
alpha

array([0.9623354 , 1.01235711, 0.95849651, 0.96679042, 0.8358445 ])

In [55]:
gamma

array([[5.1623354 , 5.21235711, 5.15849651, 5.16679042, 5.0358445 ],
       [5.1623354 , 5.21235711, 5.15849651, 5.16679042, 5.0358445 ],
       [5.1623354 , 5.21235711, 5.15849651, 5.16679042, 5.0358445 ]])

In [56]:
phi

array([[[0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2]],

       [[0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2]],

       [[0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.2, 0.2, 0.2, 0.2],
        [0.2, 0.

In [57]:
print(len(result['vocab_to_idx']))

21


In [58]:
phi, gamma = E_step(docs, phi, gamma, alpha, beta)
print(phi)
print()
print()
print(gamma)

[[[0.02346479 0.39878536 0.03673691 0.40578283 0.13523011]
  [0.09748157 0.02263331 0.30599004 0.31658291 0.25731217]
  [0.06189957 0.1591698  0.00595232 0.65660641 0.1163719 ]
  [0.20910445 0.03441702 0.29042605 0.20360155 0.26245093]
  [0.13450658 0.2200715  0.001805   0.52002753 0.12358938]
  [0.23372354 0.0640161  0.351082   0.22637095 0.12480742]
  [0.06189957 0.1591698  0.00595232 0.65660641 0.1163719 ]
  [0.05497438 0.10214811 0.05140853 0.53584767 0.25562131]
  [0.04471861 0.18800969 0.28582745 0.23317971 0.24826454]
  [0.02832183 0.02184718 0.66049108 0.11671266 0.17262725]]

 [[0.28197815 0.42363945 0.0628852  0.21516511 0.01633208]
  [0.20910445 0.03441702 0.29042605 0.20360155 0.26245093]
  [0.04256281 0.32564044 0.20459985 0.00484826 0.42234865]
  [0.08997463 0.1725226  0.30693246 0.12896522 0.30160509]
  [0.02832183 0.02184718 0.66049108 0.11671266 0.17262725]
  [0.20910445 0.03441702 0.29042605 0.20360155 0.26245093]
  [0.06495993 0.14234735 0.09986904 0.1332227  0.55960

In [59]:
np.random.seed(42)

_alpha_ = np.random.gamma(shape=100, scale=0.01, size=5)
_gamma_ = _alpha_ + np.ones((M, k)) * 21 / k

print(_alpha_)
print(_gamma_)

[1.04708219 0.98292693 0.97347267 0.97347428 1.16278368]
[[5.24708219 5.18292693 5.17347267 5.17347428 5.36278368]
 [5.24708219 5.18292693 5.17347267 5.17347428 5.36278368]
 [5.24708219 5.18292693 5.17347267 5.17347428 5.36278368]]


In [60]:
_update(_alpha_, _gamma_, M)

78.4192192526763|[1.04708219 0.98292693 0.97347267 0.97347428 1.16278368] -> [1.79562569 1.71883122 1.70721122 1.70721321 1.92565069]
78.4192192526763|[1.79562569 1.71883122 1.70721122 1.70721321 1.92565069] -> [2.89706016 2.81739023 2.80519713 2.80519922 3.0290319 ]
78.4192192526763|[2.89706016 2.81739023 2.80519713 2.80519922 3.0290319 ] -> [4.15126141 4.07750791 4.06632234 4.06632425 4.27649846]
78.4192192526763|[4.15126141 4.07750791 4.06632234 4.06632425 4.27649846] -> [5.00796502 4.94131996 4.93140917 4.93141086 5.12604242]
78.4192192526763|[5.00796502 4.94131996 4.93140917 4.93141086 5.12604242] -> [5.23567369 5.17138297 5.16190351 5.16190513 5.35149999]
78.4192192526763|[5.23567369 5.17138297 5.16190351 5.16190513 5.35149999] -> [5.24705619 5.18290058 5.17344626 5.17344787 5.36275799]
78.4192192526763|[5.24705619 5.18290058 5.17344626 5.17344787 5.36275799] -> [5.24708219 5.18292693 5.17347267 5.17347428 5.36278368]
78.4192192526763|[5.24708219 5.18292693 5.17347267 5.17347428 

array([5.24708219, 5.18292693, 5.17347267, 5.17347428, 5.36278368])

In [61]:
V = 19 
print(k)
print(V)

5
19


In [62]:
_eta_ = 1
np.random.seed(42)
_lambda_ = np.random.gamma(shape=100, scale=0.01, size=(k, V))

print(_eta_)
print(_lambda_)

1
[[1.04708219 0.98292693 0.97347267 0.97347428 1.16278368 1.07526208
  0.95052839 1.05181933 1.02101821 0.81760009 0.89893338 1.0283693
  1.15026429 0.97429619 0.94330106 1.00778149 0.9378975  0.96782864
  0.9953198 ]
 [0.89475728 1.08105987 0.87968664 0.86986295 1.01644942 1.07222286
  1.01387299 0.98516565 0.96690772 0.95138329 1.10597141 1.02937237
  0.95871485 0.93060329 1.05898791 1.1031785  1.09256024 0.91521059
  0.96611462]
 [0.94959045 0.9782464  1.08000428 1.13828828 1.03320764 0.93363961
  0.99309432 1.16117251 1.00538214 0.96711283 0.97489658 1.03274514
  0.91811143 0.94740917 1.09087585 1.02984852 1.00638968 1.09653088
  0.92820856]
 [0.96431168 1.02652255 1.02295647 0.86193808 0.95525935 0.98065155
  1.03755099 1.02260049 0.98925295 0.81711981 0.99402204 1.00269174
  1.26335968 0.97758574 1.02707522 1.11516717 1.07363499 1.14337395
  0.86316381]
 [0.90101275 0.9411936  0.84973584 1.00352721 0.90766658 1.15954736
  0.92049911 0.96485874 1.0801087  0.87876646 0.84464572 1.

In [63]:
_lambda_[0]

array([1.04708219, 0.98292693, 0.97347267, 0.97347428, 1.16278368,
       1.07526208, 0.95052839, 1.05181933, 1.02101821, 0.81760009,
       0.89893338, 1.0283693 , 1.15026429, 0.97429619, 0.94330106,
       1.00778149, 0.9378975 , 0.96782864, 0.9953198 ])

In [76]:
def _update_eta(var, vi_var, const, max_iter=10000, tol=1e-9):
    """
    From appendix A.2 of Blei et al., 2003.
    For hessian with shape `H = diag(h) + 1z1'`
    
    To update alpha, input var=alpha and vi_var=gamma, const=M.
    To update eta, input var=eta and vi_var=lambda, const=k.
    """
    for _ in range(max_iter):
        # store old value
        var0 = var
        
        # g: gradient 
        psi_sum = psi(vi_var.sum(axis=1)).reshape(-1, 1)
        g = const * (V*psi(V*var) - V*psi(var)) + np.sum(psi(vi_var)) - np.sum(V*(psi_sum))

        h = const * (V**2 * polygamma(1, V*var) - V * polygamma(1, var))

        # # update var
        var -= g/h
        print(f"grad:{g}, hessian:{h}, eta:old{var0} -> {var}")

        if var == np.inf or var == -np.inf: 
            raise ValueError(f"Grad -> {g}, Hessian -> {h}, overflow")
        
        # check convergence
        err = np.sqrt(np.mean((var - var0) ** 2))
        crit = err < tol
        if crit:
            break
    else:
        warnings.warn(f"max_iter={max_iter} reached: values might not be optimal.")
    
    #print(err)
    return var

In [77]:
_update_eta(1, _lambda_, k)

-0.8517960407552891, -58.72490095253988, 1 -> 0.985495147255443
0.013704776840540944, -60.62945883574141, 0.985495147255443 -> 0.9857211888052723
3.433071356084838e-06, -60.599086980475064, 0.9857211888052723 -> 0.9857212454574683
2.2737367544323206e-13, -60.59907937125736, 0.9857212454574683 -> 0.9857212454574721


0.9857212454574721

In [66]:
%%time
N_EPOCH = 1000
TOL = 0.1

verbose = True
lb = -np.inf

for epoch in range(N_EPOCH): 
    # store old value
    lb_old = lb 
    
    # Variational EM
    phi, gamma = E_step(docs, phi, gamma, alpha, beta)
    alpha, beta = M_step(docs, phi, gamma, alpha, beta, M)
    
    # check anomaly
    if np.any(np.isnan(alpha)):
        print("NaN detected: alpha")
        break
    
    # check convergence
    lb = vlb(docs, phi, gamma, alpha, beta, M, N, k)
    err = abs(lb - lb_old)
    
    # check anomaly
    if np.isnan(lb):
        print("NaN detected: lb")
        break
        
    if verbose:
        print(f"{epoch: 04}:  variational_lb: {lb: .3f},  error: {err: .3f}")
    
    if err < TOL:
        break
else:
    warnings.warn(f"max_iter reached: values might not be optimal.")

print(" ========== TRAINING FINISHED ==========")

44.20747180094638|[0.9623354  1.01235711 0.95849651 0.96679042 0.8358445 ] -> [1.30246263 1.3405214  1.47983028 1.40021756 1.205965  ]
44.20747180094638|[1.30246263 1.3405214  1.47983028 1.40021756 1.205965  ] -> [1.54513779 1.57450865 1.90501955 1.72630043 1.48332952]
44.20747180094638|[1.54513779 1.57450865 1.90501955 1.72630043 1.48332952] -> [1.62256447 1.64941434 2.05354967 1.83332462 1.57408585]
44.20747180094638|[1.62256447 1.64941434 2.05354967 1.83332462 1.57408585] -> [1.62825527 1.65494497 2.06526233 1.84131556 1.5808414 ]
44.20747180094638|[1.62825527 1.65494497 2.06526233 1.84131556 1.5808414 ] -> [1.62828373 1.65497279 2.06532493 1.84135584 1.58087532]
44.20747180094638|[1.62828373 1.65497279 2.06532493 1.84135584 1.58087532] -> [1.62828373 1.65497279 2.06532493 1.84135585 1.58087532]
 000:  variational_lb: -30.466,  error:  inf
56.31243784243602|[1.62828373 1.65497279 2.06532493 1.84135585 1.58087532] -> [1.79875897 1.68688547 2.21049485 1.95892711 1.67421148]
56.3124378

In [67]:
alpha 

array([1.81783492, 1.69352158, 2.22584422, 1.97141249, 1.68412409])

In [68]:
beta

array([[7.42779378e-04, 3.43280348e-03, 2.79751112e-03, 1.62539236e-01,
        3.65500141e-03, 1.02784297e-01, 1.41854097e-03, 1.85048907e-03,
        4.80505029e-03, 6.15607135e-02, 5.36133095e-03, 2.39587566e-02,
        8.22072207e-03, 1.31886279e-01, 2.32612299e-03, 2.12126430e-01,
        1.73727492e-02, 1.36573229e-01, 1.06016533e-01, 8.47878226e-03,
        2.09264206e-03],
       [2.92057339e-02, 1.84399698e-03, 1.66429635e-02, 3.10141808e-02,
        1.38354602e-02, 3.51584890e-02, 6.09814422e-03, 1.79996576e-02,
        5.54881825e-03, 9.75809627e-02, 4.32774044e-02, 4.84697427e-02,
        1.90061567e-02, 1.08583629e-02, 2.80497473e-02, 4.14968547e-01,
        8.86249182e-02, 5.38886981e-02, 3.18409848e-02, 4.78853214e-03,
        1.29849833e-03],
       [2.78313666e-03, 2.57882891e-02, 6.43812551e-04, 2.10840299e-01,
        1.17384486e-04, 8.86294806e-02, 3.17472098e-03, 2.83068116e-02,
        2.08087386e-01, 2.11723119e-02, 3.97447087e-02, 1.26042984e-01,
        1.9490

In [70]:
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.datasets import make_multilabel_classification

In [71]:
lda = LatentDirichletAllocation(n_components=5,random_state=0)
lda_trained = lda.fit(docs)

In [72]:
lda.get_params()

{'batch_size': 128,
 'doc_topic_prior': None,
 'evaluate_every': -1,
 'learning_decay': 0.7,
 'learning_method': 'batch',
 'learning_offset': 10.0,
 'max_doc_update_iter': 100,
 'max_iter': 10,
 'mean_change_tol': 0.001,
 'n_components': 5,
 'n_jobs': None,
 'perp_tol': 0.1,
 'random_state': 0,
 'topic_word_prior': None,
 'total_samples': 1000000.0,
 'verbose': 0}

In [73]:
lda.bound_

11.610725168989717

In [74]:
lda.components_.shape

(5, 10)

In [75]:
lda.components_

array([[ 9.19987838,  3.19951541, 10.20086394, 11.20026761,  8.19977108,
         3.1987176 , 12.20051215, 13.20012646, 11.19947855, 14.20052974],
       [15.20005005, 16.20130265,  5.19948615, 17.20038318, 15.20022882,
        18.2007039 , 19.20069516, 15.19958194, 20.20000234,  3.19798761],
       [ 0.20003501,  0.20005971,  0.20005608,  0.20005431,  0.20005405,
         0.20005485,  0.20005521,  0.20005333,  0.20005318,  0.20005451],
       [ 0.20000155,  1.19906252,  2.19953775,  3.1992406 ,  4.199892  ,
         5.20046879,  2.19868227,  6.20018494,  7.20041274,  8.20137363],
       [ 0.20003501,  0.20005971,  0.20005608,  0.20005431,  0.20005405,
         0.20005485,  0.20005521,  0.20005333,  0.20005318,  0.20005451]])