# Graphical Models for Textual Data
This shows how graphical models can be used to infer relationships between textual data.

In [35]:
import pandas as pd
from regain.utils import flatten
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation

In [1]:
filename = "../regain/data/text/webkb-train-stemmed.txt"
train = pd.read_csv(filename, header=None, sep='\t', index_col=0).dropna()
train.columns = ['words']
documents = train.words.fillna("")

In [2]:
words = np.unique(flatten([words.split(' ') for words in train.words.tolist() if isinstance(words, str)]))

ld = [dict(zip(*np.unique(row.words.split(' '), return_counts=True))) for row in train.itertuples()
      if isinstance(row.words, str)]

X = pd.DataFrame(ld, index=[row.Index for row in train.itertuples()
      if isinstance(row.words, str)]).fillna(0)

y = X.index

In [5]:
# LDA can only use raw term counts for LDA because it is a probabilistic graphical model
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
tf = tf_vectorizer.fit_transform(documents)

df_tf = pd.DataFrame(tf.todense(), index=train.index, columns=tf_vectorizer.get_feature_names())

In [55]:
def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents):
    topics = []
    for topic_idx, topic in enumerate(H):
        topics.append(" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))
        
        print ("Topic %d: %s" % (topic_idx, topics[-1]))
        top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents]
        for i, doc_index in enumerate(top_doc_indices):
            print ("doc %d: %s" % (doc_index, documents[doc_index]))
    return topics
# # NMF is able to use tf-idf
# tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english')
# tfidf = tfidf_vectorizer.fit_transform(documents)
# tfidf_feature_names = tfidf_vectorizer.get_feature_names()

# # Run NMF
# nmf_model = NMF(n_components=no_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(tfidf)
# nmf_W = nmf_model.transform(tfidf)
# nmf_H = nmf_model.components_

# print("NMF Topics")
# display_topics(nmf_H, nmf_W, tfidf_feature_names, documents, no_top_words, no_top_documents)

In [49]:
n_topics = 50
n_top_words = 3
n_top_documents = 3

In [47]:
# Run LDA
lda_model = LatentDirichletAllocation(n_components=n_topics, max_iter=5, learning_method='online',
                                      learning_offset=50.,random_state=0).fit(tf)
lda_W = lda_model.transform(tf)
lda_H = lda_model.components_

In [56]:
print("LDA Topics")
topics = display_topics(lda_H, lda_W, tf_vectorizer.get_feature_names(), documents, n_top_words, n_top_documents)

df = pd.DataFrame(lda_W, index=train.index, columns=topics)
X = lda_W
y = documents.index

LDA Topics
Topic 0: databas washington queri
doc 971: seq project queri sequenc data document construct time put order databas order time put databas time put databas order document content project object current statu motiv seq data model sequin queri languag optim techniqu seq system develop public relat work contact inform project object number import databas applic requir process larg amount order sequenc data domain applic includ financi manag histor analysi econom social scienc medic scienc biolog scienc exist relat databas inadequ regard data collect treat set sequenc express sequenc queri tediou evalu ineffici databas model data abstract sequenc data sequenc queri declar manner util order semant advantag uniqu opportun queri optim evalu integr sequenc data relat data user store queri combin relat sequenc requir serv goal seq project kind sequenc support tempor sequenc import kind queri express notion previou natur sequenc queri optim evalu effici issu studi theori databas syste

In [11]:
# X = pd.DataFrame([], columns=words)
# y = []
# for row in train.itertuples():
#     if isinstance(row.words, str):
#         series = pd.Series(dict(zip(*np.unique(row.words.split(' '), return_counts=True))), name=row.Index)
#         X = X.append(series)
#         y.append(row.Index)

# X = X.fillna(0)
# X.index = y

# y = np.asarray(y)

words_to_discard = []
for yy in np.unique(y):
    words_to_discard += list(X[words[X[y==yy].sum(axis=0) == 0]].columns)

words_to_keep = list(set(X.columns) - set(words_to_discard))

def logentropy_normalize(X):
    P = X / X.values.sum(axis=0, keepdims=True)
    E = 1 + (P * np.log(P)).fillna(0).values.sum(axis=0, keepdims=True) / np.log(X.shape[0] + 1)
    return E * np.log(1 + X)

X_new = logentropy_normalize(X)[words_to_keep]



In [12]:
from gensim.models import LogEntropyModel
from gensim.test.utils import common_texts
from gensim.corpora import Dictionary

corp = [w.split(' ') for w in train.words.tolist() if isinstance(w, str)]
text = corp #common_texts
dct = Dictionary(text)  # fit dictionary

num_terms = 50 # or words.size
dct.filter_extremes(keep_n=num_terms)

corpus = [dct.doc2bow(row) for row in text][:10]  #convert to BoW format
model = LogEntropyModel(corpus, normalize=True)  # fit model

In [14]:
import ctmmodel
ctm_model = ctmmodel.CtmModel(corpus, id2word=dct, num_topics=15)

0


  bound += np.sum(np.log(nu2)) + self.num_topics  # TODO safe_log
  res += 0.5 / nu2


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [15]:
all_words = []
for c in corpus:
    doc_words = []
    for cc in c:
        doc_words.extend([dct[cc[0]]] * cc[1])
    all_words.append(' '.join(doc_words))

In [26]:
import pyctm
from pyctm import variational_bayes, inferencer, utils
# parameter set 3
alpha_mu=0.
alpha_sigma=1
alpha_beta=0

ctm_inferencer = variational_bayes.VariationalBayes();
ctm_inferencer._initialize(all_words, list(dct.values()), number_of_topics=15,
                           alpha_mu=alpha_mu, alpha_sigma=alpha_sigma, alpha_beta=alpha_beta);

for iteration in range(50):
    ctm_inferencer.learning(-1)

successfully parse 10 documents...
creating 8 processes
[[-64.209422]] nan
e_step and m_step of iteration 1 finished in 0.122496 and 0.000340939 seconds respectively with log likelihood nan
update hyper-parameter mu to [ 0.00236528 -0.02848685  0.05582271 -0.01741234  0.0204918  -0.0208173
 -0.04207808  0.04002519 -0.02070904  0.02731378  0.02718902  0.05141206
 -0.08558489  0.02507594 -0.04204119]
update hyper-parameter sigma to
[[ 4.72478977e-01 -3.39244515e-03 -2.62055756e-03 -1.44179434e-03
  -1.95299415e-03 -4.47710247e-04 -3.65385988e-03  2.11118227e-03
  -1.66125279e-03 -4.17026441e-03  1.02400313e-02  5.18919079e-03
  -6.07055816e-03 -6.90568613e-06 -1.57402068e-04]
 [-3.39244515e-03  4.81943480e-01 -1.58062357e-03 -8.30949061e-03
  -2.18674288e-03  4.04759563e-03  3.04638493e-03  1.66296854e-03
   5.57377647e-04 -7.01017261e-04 -6.64291342e-04 -5.91226488e-03
   1.45914450e-03  1.34757771e-05  1.37211109e-03]
 [-2.62055756e-03 -1.58062357e-03  4.59117474e-01  2.89237628e-03
  

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 4 finished in 0.198572 and 0.000667095 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.16727981  0.32594371  0.05886509  0.49638625  0.01029289 -0.3300881
 -0.22656625 -0.08355682 -0.32836028 -0.18850041  0.72680491  0.14737153
 -0.42744742 -0.02372016 -0.30797697]
update hyper-parameter sigma to
[[ 3.04374596e-01 -2.63160302e-01 -2.27319180e-02  2.56799936e-04
  -1.21067384e-02 -3.86785123e-02 -4.13834620e-02 -8.17099385e-03
   2.61265530e-03 -3.36465052e-02  2.30791651e-01  7.07854877e-02
  -5.81173247e-03 -2.19276593e-02 -5.25972809e-03]
 [-2.63160302e-01  2.06402777e+00  9.39058622e-02 -4.74048113e-01
  -1.26360487e-01  1.69569367e-01  1.08186394e-01  8.12151773e-02
  -9.45061680e-04  3.70383844e-02 -8.81337417e-01 -2.85740607e-01
  -5.34624233e-02  2.17862866e-01  3.82619455e-02]
 [-2.27319180e-02  9.39058622e-02  2.20353026e-01  5.53731706e-02
   3.28856927e-02  2.14289952e-02  3.01577808e-02  1.48697184e-02

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 5 finished in 0.227302 and 0.000361919 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.1639434   0.57772119 -0.00592061  0.73896908  0.04172367 -0.3959841
 -0.223769   -0.11346379 -0.38556161 -0.20787965  0.59208357  0.09163749
 -0.4900268  -0.00500942 -0.34732992]
update hyper-parameter sigma to
[[ 3.90540065e-01 -4.79736260e-01 -3.93749278e-02  3.40554811e-02
  -1.87440005e-02 -6.95112156e-02 -7.69832871e-02 -1.94325138e-02
   1.89947406e-02 -6.18366722e-02  3.58904182e-01  8.63382884e-02
  -9.91995748e-03 -2.81262970e-02 -2.34910142e-02]
 [-4.79736260e-01  2.76395398e+00  1.49496144e-01 -4.43864100e-01
  -4.68919545e-02  2.56571887e-01  1.88046961e-01  1.19217477e-01
  -5.99303768e-02  1.23704956e-01 -1.39682298e+00 -3.03353305e-01
  -1.05878231e-01  2.48432962e-01  1.02343894e-01]
 [-3.93749278e-02  1.49496144e-01  1.96305938e-01  3.31636072e-02
   2.80638695e-02  2.70504670e-02  3.56832772e-02  1.73689210e-02

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 6 finished in 0.233481 and 0.000446796 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.35243640e-01  7.20874720e-01 -5.18858005e-02  8.92029218e-01
  6.22771274e-02 -4.45905191e-01 -2.15341455e-01 -1.09139815e-01
 -4.00942450e-01 -2.10986369e-01  4.32482298e-01  5.71441191e-02
 -5.30186380e-01  2.88333599e-04 -3.65243470e-01]
update hyper-parameter sigma to
[[ 4.93775327e-01 -6.57859121e-01 -5.27097431e-02  5.29023962e-02
  -4.63524161e-02 -9.68790415e-02 -1.07420639e-01 -2.78534927e-02
   5.31419598e-02 -8.86604936e-02  4.61175795e-01  8.26452726e-02
  -1.10459198e-02 -2.50052638e-02 -4.63204333e-02]
 [-6.57859121e-01  3.08063031e+00  1.81537669e-01 -2.45621108e-01
   1.12505674e-01  3.15690689e-01  2.34715527e-01  1.31766164e-01
  -1.62176132e-01  2.19321957e-01 -1.74075693e+00 -2.74148204e-01
  -1.68942893e-01  2.14389960e-01  1.61408092e-01]
 [-5.27097431e-02  1.81537669e-01  1.80004666e-01  9.46636847e-03
   

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 7 finished in 0.219565 and 0.000467062 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.09998576  0.75726436 -0.07407689  0.95576999  0.06791844 -0.4924275
 -0.21454536 -0.09016088 -0.38993743 -0.22647514  0.33368494  0.04232832
 -0.54939755 -0.00838979 -0.38128739]
update hyper-parameter sigma to
[[ 5.78372012e-01 -7.71254851e-01 -5.88976288e-02  7.54526139e-02
  -8.46276632e-02 -1.16886256e-01 -1.32736745e-01 -3.38682218e-02
   9.37884840e-02 -1.04565339e-01  5.26638164e-01  6.70370645e-02
  -1.35762617e-02 -1.82115223e-02 -7.17221226e-02]
 [-7.71254851e-01  3.20924224e+00  1.94337261e-01 -1.13085755e-01
   2.65683070e-01  3.55399872e-01  2.65034413e-01  1.31521885e-01
  -2.73609798e-01  2.91218875e-01 -1.91390719e+00 -2.35543943e-01
  -2.17237817e-01  1.68421520e-01  2.13072474e-01]
 [-5.88976288e-02  1.94337261e-01  1.68295522e-01 -1.36883490e-02
   2.93601160e-02  3.31682760e-02  4.11526355e-02  2.00308668e-02

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 8 finished in 0.223811 and 0.000626802 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.07226019  0.74429836 -0.07958217  0.96863259  0.065337   -0.53620035
 -0.21430176 -0.06862672 -0.36516991 -0.25003075  0.26759086  0.03988413
 -0.55666048 -0.02657775 -0.39709684]
update hyper-parameter sigma to
[[ 0.63611275 -0.8372035  -0.06136233  0.10561135 -0.12312611 -0.13046432
  -0.15567657 -0.03696113  0.13315507 -0.11129025  0.56237147  0.04888091
  -0.01960898 -0.01224088 -0.0959402 ]
 [-0.8372035   3.26790314  0.19809704 -0.06535306  0.39583252  0.38416267
   0.2958196   0.12806817 -0.37999846  0.33721489 -1.9943672  -0.20757742
  -0.24510664  0.1294896   0.26188906]
 [-0.06136233  0.19809704  0.15971133 -0.03345642  0.03309991  0.03360076
   0.04402936  0.01939712 -0.02915635  0.02635566 -0.184828   -0.02635345
   0.00420524  0.0068207   0.03742565]
 [ 0.10561135 -0.06535306 -0.03345642  1.41054358  0.22727408  0.00

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 9 finished in 0.2237 and 0.000372887 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.05620027  0.7214832  -0.07687908  0.96218771  0.05718275 -0.57651125
 -0.2176197  -0.04813607 -0.33848005 -0.27177612  0.21596214  0.04316511
 -0.56219857 -0.04435539 -0.41380545]
update hyper-parameter sigma to
[[ 0.66682454 -0.86659255 -0.05933606  0.13277008 -0.15382399 -0.13787116
  -0.17290668 -0.03564608  0.16478407 -0.11284662  0.57419182  0.02895389
  -0.02772389 -0.0074798  -0.11500688]
 [-0.86659255  3.2898291   0.19184787 -0.05449678  0.50159793  0.40477564
   0.32434814  0.12123032 -0.47399624  0.36605188 -2.02843779 -0.18977839
  -0.26004847  0.10037748  0.30470112]
 [-0.05933606  0.19184787  0.15255946 -0.04756393  0.03685145  0.03294878
   0.04700032  0.01779512 -0.03415368  0.02205286 -0.1728809  -0.02378118
   0.00890013  0.0071038   0.04182584]
 [ 0.13277008 -0.05449678 -0.04756393  1.41703683  0.2078012   0.0094

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 10 finished in 0.237171 and 0.000708818 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.05033957  0.70063949 -0.06888328  0.95608462  0.04486342 -0.61344986
 -0.22493877 -0.03090844 -0.31374946 -0.28813859  0.17591424  0.04413979
 -0.57108427 -0.05976053 -0.43303383]
update hyper-parameter sigma to
[[ 6.80882082e-01 -8.76817059e-01 -5.38345665e-02  1.52729978e-01
  -1.75694442e-01 -1.41404535e-01 -1.84859202e-01 -3.12930158e-02
   1.89172734e-01 -1.12385958e-01  5.75431841e-01  4.88312575e-03
  -3.64436354e-02 -3.17110366e-03 -1.29347963e-01]
 [-8.76817059e-01  3.29815235e+00  1.77482444e-01 -5.18424672e-02
   5.86240363e-01  4.20342864e-01  3.47921777e-01  1.11751257e-01
  -5.54017108e-01  3.85102572e-01 -2.04994277e+00 -1.69183239e-01
  -2.68297843e-01  7.77335963e-02  3.40615782e-01]
 [-5.38345665e-02  1.77482444e-01  1.46289544e-01 -5.69351174e-02
   3.91573958e-02  3.14436023e-02  4.97019647e-02  1.57801526e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 11 finished in 0.249325 and 0.000403166 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.05196384  0.68630539 -0.05775525  0.95527101  0.03277632 -0.64675306
 -0.23401268 -0.01857251 -0.29401183 -0.30021541  0.14189992  0.04358386
 -0.58411012 -0.07312336 -0.45380845]
update hyper-parameter sigma to
[[ 6.93231039e-01 -8.84001869e-01 -4.80635004e-02  1.69848685e-01
  -1.91978815e-01 -1.43982636e-01 -1.95739541e-01 -2.65661246e-02
   2.09041450e-01 -1.11753017e-01  5.74333448e-01 -2.15292657e-02
  -4.68839239e-02  1.44560768e-03 -1.41676190e-01]
 [-8.84001869e-01  3.29595100e+00  1.60737572e-01 -4.98407265e-02
   6.52676567e-01  4.31947417e-01  3.66643512e-01  1.01388702e-01
  -6.16140358e-01  3.95446100e-01 -2.06135893e+00 -1.47094916e-01
  -2.70007722e-01  5.87903767e-02  3.69930237e-01]
 [-4.80635004e-02  1.60737572e-01  1.41057281e-01 -6.33169983e-02
   4.02363864e-02  2.97649446e-02  5.25610362e-02  1.39058077e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 12 finished in 0.251772 and 0.000515223 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.0623456   0.68069642 -0.04375122  0.95121068  0.02135269 -0.67623216
 -0.24042615 -0.00971202 -0.27999305 -0.30727436  0.10702878  0.04617637
 -0.59843887 -0.08540813 -0.47365641]
update hyper-parameter sigma to
[[ 0.7019492  -0.88101986 -0.04188082  0.18390141 -0.2010242  -0.14457865
  -0.20487221 -0.02164635  0.22285868 -0.11050122  0.56213934 -0.04499437
  -0.05861105  0.00557141 -0.15132519]
 [-0.88101986  3.25952222  0.14247115 -0.0437164   0.69723341  0.43629171
   0.37730288  0.08832988 -0.65609653  0.39529897 -2.03784708 -0.13247254
  -0.26585198  0.04403574  0.38954965]
 [-0.04188082  0.14247115  0.13680034 -0.06734282  0.04016663  0.02794963
   0.05528489  0.01213448 -0.03862408  0.01127594 -0.142023   -0.02035963
   0.02215105  0.004468    0.05144458]
 [ 0.18390141 -0.0437164  -0.06734282  1.47894988  0.25030054  0.0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 13 finished in 0.235815 and 0.000381947 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.08091269  0.68032491 -0.02780333  0.9390787   0.01019353 -0.70269165
 -0.24212035 -0.00278064 -0.27101855 -0.30690832  0.07330992  0.05036791
 -0.61270116 -0.09737654 -0.49172044]
update hyper-parameter sigma to
[[ 7.10153300e-01 -8.74448781e-01 -3.60294453e-02  1.96350290e-01
  -2.06149140e-01 -1.44792810e-01 -2.14772694e-01 -1.71758364e-02
   2.33734854e-01 -1.10361168e-01  5.49320251e-01 -6.58929667e-02
  -7.03656180e-02  9.66448548e-03 -1.60507456e-01]
 [-8.74448781e-01  3.20494686e+00  1.24840801e-01 -3.88597829e-02
   7.25587025e-01  4.36346952e-01  3.84390925e-01  7.27356867e-02
  -6.80260426e-01  3.89112244e-01 -1.99743191e+00 -1.22371077e-01
  -2.58062535e-01  3.29105724e-02  4.03421203e-01]
 [-3.60294453e-02  1.24840801e-01  1.33505491e-01 -6.96127086e-02
   3.95580785e-02  2.63003781e-02  5.80309837e-02  1.04058385e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 14 finished in 0.249344 and 0.000488043 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.06572911e-01  6.85164920e-01 -1.25318893e-02  9.24268770e-01
  7.06243867e-04 -7.26350528e-01 -2.38205949e-01  6.15430414e-03
 -2.66804830e-01 -3.00145067e-01  4.04419151e-02  5.01083535e-02
 -6.28183030e-01 -1.09606538e-01 -5.07955296e-01]
update hyper-parameter sigma to
[[ 7.20996422e-01 -8.73137678e-01 -3.19034354e-02  2.08107766e-01
  -2.11233056e-01 -1.46382891e-01 -2.27769617e-01 -1.26809533e-02
   2.45068629e-01 -1.11198426e-01  5.45086995e-01 -8.52757550e-02
  -8.13645223e-02  1.39884939e-02 -1.71781252e-01]
 [-8.73137678e-01  3.15735714e+00  1.10097536e-01 -3.81414933e-02
   7.44790749e-01  4.36063691e-01  3.91773642e-01  5.36218744e-02
  -6.96879160e-01  3.79060237e-01 -1.96072318e+00 -1.07462901e-01
  -2.48708521e-01  2.47711418e-02  4.15665499e-01]
 [-3.19034354e-02  1.10097536e-01  1.30776766e-01 -7.03721231e-02
  

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 15 finished in 0.25465 and 0.000607014 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.13807399  0.69335927  0.00104337  0.91233954 -0.00890996 -0.74794511
 -0.2296438   0.01741523 -0.26569508 -0.28908958  0.01093865  0.04264504
 -0.64511894 -0.12185528 -0.52376716]
update hyper-parameter sigma to
[[ 7.35998504e-01 -8.79413604e-01 -2.96829177e-02  2.20969771e-01
  -2.18958242e-01 -1.50148872e-01 -2.45746867e-01 -8.76332050e-03
   2.58817021e-01 -1.12156604e-01  5.52461308e-01 -1.03803607e-01
  -9.19828283e-02  1.88947970e-02 -1.87203300e-01]
 [-8.79413604e-01  3.12077623e+00  9.85415868e-02 -4.39205830e-02
   7.58377978e-01  4.36501253e-01  4.01836811e-01  3.27696919e-02
  -7.09818111e-01  3.64485157e-01 -1.93153791e+00 -8.28514669e-02
  -2.37366987e-01  1.85561038e-02  4.28713176e-01]
 [-2.96829177e-02  9.85415868e-02  1.28393961e-01 -7.07663523e-02
   3.71803131e-02  2.39499790e-02  6.25754291e-02  6.95358366e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 16 finished in 0.245582 and 0.00036788 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.17462886  0.70308798  0.01292525  0.90444413 -0.02501072 -0.76883089
 -0.21902861  0.02749286 -0.26481962 -0.27478744 -0.00943502  0.0319105
 -0.6620636  -0.132991   -0.54158803]
update hyper-parameter sigma to
[[ 7.56369485e-01 -8.91993064e-01 -2.90559605e-02  2.37607830e-01
  -2.31697516e-01 -1.56362992e-01 -2.70409083e-01 -6.83410696e-03
   2.76250195e-01 -1.12719078e-01  5.71375205e-01 -1.21615052e-01
  -1.03178067e-01  2.50174880e-02 -2.08403532e-01]
 [-8.91993064e-01  3.08727414e+00  8.96592234e-02 -5.89564071e-02
   7.69613599e-01  4.37419579e-01  4.17014944e-01  1.63770710e-02
  -7.21185807e-01  3.44795700e-01 -1.90667621e+00 -5.26000965e-02
  -2.23703364e-01  1.24700062e-02  4.44803447e-01]
 [-2.90559605e-02  8.96592234e-02  1.26373167e-01 -7.19036778e-02
   3.65162914e-02  2.33107065e-02  6.50066058e-02  6.00172758e-03

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 17 finished in 0.241087 and 0.000399113 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.2162866   0.71372029  0.0235558   0.89779256 -0.04525073 -0.78873175
 -0.20511473  0.03648553 -0.26456911 -0.2580587  -0.02451193  0.01898025
 -0.67797719 -0.14348035 -0.55999667]
update hyper-parameter sigma to
[[ 7.81801955e-01 -9.08620654e-01 -2.94248762e-02  2.55395740e-01
  -2.49630106e-01 -1.64639577e-01 -3.00207409e-01 -6.87289836e-03
   2.97162352e-01 -1.12989589e-01  5.99173524e-01 -1.38314392e-01
  -1.13976182e-01  3.25482811e-02 -2.34557957e-01]
 [-9.08620654e-01  3.05543577e+00  8.27861318e-02 -7.65211742e-02
   7.79722551e-01  4.38782183e-01  4.34370370e-01  4.54361896e-03
  -7.31843230e-01  3.22565748e-01 -1.88579409e+00 -2.13974688e-02
  -2.10998358e-01  6.29705750e-03  4.63049274e-01]
 [-2.94248762e-02  8.27861318e-02  1.24632326e-01 -7.32377832e-02
   3.63389825e-02  2.29780632e-02  6.74982898e-02  5.56826915e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 18 finished in 0.238338 and 0.000675201 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.26091598  0.72101898  0.03323783  0.89137346 -0.06848029 -0.80814119
 -0.1885787   0.04476006 -0.26384253 -0.24216781 -0.0342704   0.00483345
 -0.69211661 -0.15339496 -0.57876925]
update hyper-parameter sigma to
[[ 8.12404271e-01 -9.29992954e-01 -3.05814743e-02  2.72161432e-01
  -2.73222490e-01 -1.75068853e-01 -3.33531473e-01 -8.48153742e-03
   3.21674514e-01 -1.13833856e-01  6.36537592e-01 -1.52869921e-01
  -1.23681962e-01  4.13280755e-02 -2.65237502e-01]
 [-9.29992954e-01  3.03569587e+00  7.78796950e-02 -9.15597749e-02
   7.93991468e-01  4.42504345e-01  4.52131579e-01 -3.81561591e-03
  -7.45142825e-01  3.04372950e-01 -1.87959037e+00  4.73278530e-03
  -2.02276721e-01 -3.15160412e-05  4.84597245e-01]
 [-3.05814743e-02  7.78796950e-02  1.23038225e-01 -7.43790302e-02
   3.64695553e-02  2.28904211e-02  6.96620373e-02  5.41102046e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 19 finished in 0.244246 and 0.000715733 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.30756875  0.72569187  0.04214314  0.88583532 -0.0926769  -0.82687654
 -0.17064304  0.0524932  -0.26274261 -0.22722504 -0.04178995 -0.01075875
 -0.70532709 -0.1628885  -0.59770015]
update hyper-parameter sigma to
[[ 8.47157217e-01 -9.53042538e-01 -3.21334809e-02  2.87709312e-01
  -3.00082919e-01 -1.86706234e-01 -3.68084695e-01 -1.11111584e-02
   3.48322814e-01 -1.15171552e-01  6.78133312e-01 -1.65984486e-01
  -1.32549720e-01  5.10044455e-02 -2.98790597e-01]
 [-9.53042538e-01  3.02172137e+00  7.43138220e-02 -1.04490124e-01
   8.09023377e-01  4.47047678e-01  4.67233620e-01 -9.88534309e-03
  -7.58820870e-01  2.89454566e-01 -1.87912251e+00  2.65412582e-02
  -1.96715916e-01 -5.95136391e-03  5.07184314e-01]
 [-3.21334809e-02  7.43138220e-02  1.21506194e-01 -7.51440815e-02
   3.66048894e-02  2.29032502e-02  7.11753533e-02  5.39131836e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 20 finished in 0.245646 and 0.000458956 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.35534527  0.72811737  0.05029368  0.88124427 -0.11722913 -0.84508923
 -0.15277458  0.05962153 -0.26084481 -0.21315529 -0.04748795 -0.0275363
 -0.71813467 -0.17195262 -0.61713154]
update hyper-parameter sigma to
[[ 8.85283882e-01 -9.76131797e-01 -3.38160455e-02  3.02000190e-01
  -3.28495193e-01 -1.98925336e-01 -4.02276731e-01 -1.44000628e-02
   3.76196093e-01 -1.16897506e-01  7.20611143e-01 -1.78305387e-01
  -1.40695334e-01  6.13039962e-02 -3.33988924e-01]
 [-9.76131797e-01  3.01040433e+00  7.17395268e-02 -1.15988274e-01
   8.23089449e-01  4.51679925e-01  4.78614394e-01 -1.43219876e-02
  -7.71957608e-01  2.76877423e-01 -1.88015846e+00  4.49350909e-02
  -1.93527128e-01 -1.11907372e-02  5.29711212e-01]
 [-3.38160455e-02  7.17395268e-02  1.19994524e-01 -7.54281539e-02
   3.66333618e-02  2.29475798e-02  7.19239515e-02  5.42817267e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 21 finished in 0.244584 and 0.000372887 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.40356347  0.72841009  0.05768652  0.87739091 -0.14195143 -0.86298946
 -0.13597347  0.06606866 -0.25757734 -0.19995775 -0.05119745 -0.04511452
 -0.73073386 -0.18058265 -0.63739203]
update hyper-parameter sigma to
[[ 9.26300677e-01 -9.98762279e-01 -3.54958908e-02  3.15174335e-01
  -3.57465895e-01 -2.11419629e-01 -4.35309512e-01 -1.81212809e-02
   4.04969907e-01 -1.18970490e-01  7.62406642e-01 -1.90286315e-01
  -1.48209385e-01  7.20439418e-02 -3.70159057e-01]
 [-9.98762279e-01  3.00113780e+00  6.99866441e-02 -1.26813028e-01
   8.35654258e-01  4.56249216e-01  4.86497474e-01 -1.75343682e-02
  -7.84678701e-01  2.66155298e-01 -1.88150652e+00  6.06323937e-02
  -1.92021918e-01 -1.56753076e-02  5.51950711e-01]
 [-3.54958908e-02  6.99866441e-02  1.18483209e-01 -7.52166471e-02
   3.65386986e-02  2.30027229e-02  7.19612639e-02  5.47686629e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 22 finished in 0.245826 and 0.000372887 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.45183338  0.72664336  0.06432454  0.87396756 -0.16670601 -0.88074211
 -0.12076547  0.07178895 -0.25235224 -0.18763078 -0.05285979 -0.06314241
 -0.74313386 -0.18881158 -0.65866512]
update hyper-parameter sigma to
[[ 9.69959096e-01 -1.02100292e+00 -3.71321918e-02  3.27554201e-01
  -3.86422929e-01 -2.24085495e-01 -4.67036841e-01 -2.21429870e-02
   4.34765655e-01 -1.21391974e-01  8.02943023e-01 -2.02285245e-01
  -1.55237871e-01  8.31249259e-02 -4.07069646e-01]
 [-1.02100292e+00  2.99419896e+00  6.89777212e-02 -1.37818060e-01
   8.46533909e-01  4.60829133e-01  4.91748600e-01 -1.97938265e-02
  -7.97616245e-01  2.57001553e-01 -1.88296830e+00  7.43827363e-02
  -1.91529265e-01 -1.94282008e-02  5.74114606e-01]
 [-3.71321918e-02  6.89777212e-02  1.16967012e-01 -7.45547155e-02
   3.63521691e-02  2.30781850e-02  7.14475873e-02  5.51979791e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 23 finished in 0.249495 and 0.00039196 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.49995867  0.72284208  0.07021404  0.8706647  -0.19136825 -0.89848241
 -0.10742513  0.07675664 -0.24453187 -0.17617045 -0.05248367 -0.08133876
 -0.75525359 -0.19668858 -0.68106163]
update hyper-parameter sigma to
[[ 1.01621409e+00 -1.04323974e+00 -3.87478776e-02  3.39560830e-01
  -4.15029486e-01 -2.36954153e-01 -4.97763329e-01 -2.63995536e-02
   4.66045101e-01 -1.24189369e-01  8.42200536e-01 -2.14626528e-01
  -1.61987159e-01  9.45079259e-02 -4.44815683e-01]
 [-1.04323974e+00  2.99028500e+00  6.86960146e-02 -1.49824582e-01
   8.55671719e-01  4.65612892e-01  4.95525728e-01 -2.12737018e-02
  -8.11728716e-01  2.49250947e-01 -1.88478727e+00  8.69573628e-02
  -1.91448246e-01 -2.25218371e-02  5.96689248e-01]
 [-3.87478776e-02  6.86960146e-02  1.15451304e-01 -7.35195792e-02
   3.61340361e-02  2.32040447e-02  7.05952432e-02  5.55739084e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 24 finished in 0.255464 and 0.000374079 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.54788158  0.71715922  0.0753632   0.86742305 -0.21564585 -0.91625428
 -0.09602193  0.08099201 -0.23375213 -0.16552597 -0.0503994  -0.09956982
 -0.7670964  -0.2043087  -0.70458329]
update hyper-parameter sigma to
[[ 1.06510919e+00 -1.06584345e+00 -4.03942454e-02  3.51637598e-01
  -4.42986158e-01 -2.50085602e-01 -5.27925056e-01 -3.08496861e-02
   4.99264608e-01 -1.27373919e-01  8.80290519e-01 -2.27610262e-01
  -1.68695208e-01  1.06167726e-01 -4.83611452e-01]
 [-1.06584345e+00  2.98989365e+00  6.91246795e-02 -1.63317077e-01
   8.63050172e-01  4.70776218e-01  4.98829655e-01 -2.20934628e-02
  -8.27805921e-01  2.42774771e-01 -1.88724832e+00  9.89522767e-02
  -1.91418763e-01 -2.50435806e-02  6.20138733e-01]
 [-4.03942454e-02  6.91246795e-02  1.13946219e-01 -7.22130350e-02
   3.59354987e-02  2.34090215e-02  6.96023575e-02  5.59518788e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 25 finished in 0.247916 and 0.000338793 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.59596443  0.71083738  0.07981162  0.86544175 -0.2382197  -0.93359889
 -0.0857696   0.08473201 -0.22181616 -0.15537343 -0.04873106 -0.11815029
 -0.77920872 -0.21203898 -0.72858006]
update hyper-parameter sigma to
[[ 1.11635882e+00 -1.08796987e+00 -4.20427954e-02  3.64168030e-01
  -4.69723238e-01 -2.63213532e-01 -5.57199898e-01 -3.53559093e-02
   5.33237731e-01 -1.30762969e-01  9.16546948e-01 -2.41262388e-01
  -1.75508570e-01  1.17985889e-01 -5.23104437e-01]
 [-1.08796987e+00  2.99075989e+00  6.99954064e-02 -1.77186081e-01
   8.68757540e-01  4.75922717e-01  5.00914185e-01 -2.24627269e-02
  -8.43922731e-01  2.37191863e-01 -1.88965732e+00  1.09718494e-01
  -1.92092920e-01 -2.70382519e-02  6.43598596e-01]
 [-4.20427954e-02  6.99954064e-02  1.12449995e-01 -7.08048413e-02
   3.56864005e-02  2.36358346e-02  6.84663435e-02  5.60949844e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 26 finished in 0.246241 and 0.000505209 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.64530054  0.70671017  0.08370512  0.86721409 -0.25679267 -0.94935541
 -0.07443145  0.0885004  -0.21410647 -0.14504718 -0.05165098 -0.13758592
 -0.79254847 -0.22057231 -0.75128138]
update hyper-parameter sigma to
[[ 1.16894209e+00 -1.10659947e+00 -4.34336880e-02  3.77041939e-01
  -4.94684046e-01 -2.75469033e-01 -5.83674106e-01 -3.96076569e-02
   5.63549633e-01 -1.33875324e-01  9.49276816e-01 -2.54766793e-01
  -1.82112635e-01  1.29745459e-01 -5.61675434e-01]
 [-1.10659947e+00  2.98589986e+00  7.05513223e-02 -1.87815249e-01
   8.73311292e-01  4.79622297e-01  4.97985693e-01 -2.28171117e-02
  -8.53234097e-01  2.31649344e-01 -1.88970997e+00  1.16486398e-01
  -1.95588159e-01 -2.84785135e-02  6.63640326e-01]
 [-4.34336880e-02  7.05513223e-02  1.10926030e-01 -6.94770437e-02
   3.51271451e-02  2.36660350e-02  6.67957939e-02  5.52397228e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 27 finished in 0.278705 and 0.000668764 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.6958858   0.70377668  0.0872169   0.86984268 -0.27385318 -0.96406018
 -0.06192293  0.09217342 -0.20996998 -0.13479285 -0.05615167 -0.15620481
 -0.80550948 -0.22949771 -0.7727264 ]
update hyper-parameter sigma to
[[ 1.22322652e+00 -1.12257474e+00 -4.44585111e-02  3.89001251e-01
  -5.19539153e-01 -2.87145093e-01 -6.07573024e-01 -4.36754536e-02
   5.90560290e-01 -1.36884378e-01  9.80363975e-01 -2.67440967e-01
  -1.87918078e-01  1.41756740e-01 -5.99462007e-01]
 [-1.12257474e+00  2.97663629e+00  7.07235288e-02 -1.95113400e-01
   8.77548480e-01  4.82171946e-01  4.90781293e-01 -2.32048169e-02
  -8.57116741e-01  2.26257320e-01 -1.88821758e+00  1.20142451e-01
  -2.01091495e-01 -2.95501320e-02  6.80499949e-01]
 [-4.44585111e-02  7.07235288e-02  1.09364230e-01 -6.80017737e-02
   3.43034209e-02  2.35050460e-02  6.45699058e-02  5.34706614e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 28 finished in 0.260568 and 0.000577927 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.74759399  0.70121079  0.09039003  0.87229646 -0.2904664  -0.97810167
 -0.04866176  0.09564722 -0.20821124 -0.1247784  -0.06089639 -0.17357179
 -0.81763206 -0.23858317 -0.79325045]
update hyper-parameter sigma to
[[ 1.27951945e+00 -1.13681690e+00 -4.51085412e-02  3.99581314e-01
  -5.45261541e-01 -2.98553987e-01 -6.29392799e-01 -4.76470563e-02
   6.15355783e-01 -1.39944020e-01  1.01100462e+00 -2.79073181e-01
  -1.92760570e-01  1.54217213e-01 -6.36777014e-01]
 [-1.13681690e+00  2.96499109e+00  7.06253837e-02 -2.00328711e-01
   8.81611595e-01  4.83981026e-01  4.80595940e-01 -2.36082492e-02
  -8.57753357e-01  2.21182419e-01 -1.88560119e+00  1.22008061e-01
  -2.07463511e-01 -3.03144112e-02  6.94982613e-01]
 [-4.51085412e-02  7.06253837e-02  1.07775861e-01 -6.63657008e-02
   3.32575116e-02  2.31917977e-02  6.18811860e-02  5.09686774e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 29 finished in 0.274424 and 0.000521898 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.80043361  0.69865832  0.09322589  0.8745303  -0.30677626 -0.99164369
 -0.03494188  0.09888451 -0.20810204 -0.11506124 -0.06565265 -0.18976338
 -0.82897206 -0.24777529 -0.81307307]
update hyper-parameter sigma to
[[ 1.33788012e+00 -1.14974330e+00 -4.53858291e-02  4.08621707e-01
  -5.72279867e-01 -3.09861649e-01 -6.49406262e-01 -5.15664851e-02
   6.38620018e-01 -1.43115492e-01  1.04177348e+00 -2.89510638e-01
  -1.96580303e-01  1.67215738e-01 -6.73741926e-01]
 [-1.14974330e+00  2.95221855e+00  7.03513051e-02 -2.04392767e-01
   8.85470124e-01  4.85303292e-01  4.68254059e-01 -2.40247040e-02
  -8.56479983e-01  2.16491158e-01 -1.88210519e+00  1.22814626e-01
  -2.14050296e-01 -3.07584511e-02  7.07620593e-01]
 [-4.53858291e-02  7.03513051e-02  1.06175621e-01 -6.46305584e-02
   3.20069799e-02  2.27543993e-02  5.88164506e-02  4.78648026e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 30 finished in 0.289698 and 0.000582933 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.85447031  0.69593571  0.09572318  0.87661778 -0.32274986 -1.0047801
 -0.02094979  0.10187046 -0.20914922 -0.10566193 -0.07037933 -0.20488983
 -0.83962132 -0.25706782 -0.83233356]
update hyper-parameter sigma to
[[ 1.39831896e+00 -1.16156666e+00 -4.52926220e-02  4.16014324e-01
  -6.00820525e-01 -3.21169011e-01 -6.67780557e-01 -5.54552474e-02
   6.60784932e-01 -1.46417295e-01  1.07304848e+00 -2.98593646e-01
  -1.99318469e-01  1.80801275e-01 -7.10388463e-01]
 [-1.16156666e+00  2.93913197e+00  6.99676884e-02 -2.07850972e-01
   8.89077866e-01  4.86304624e-01  4.54271846e-01 -2.44606085e-02
  -8.54108423e-01  2.12197198e-01 -1.87796345e+00  1.22945968e-01
  -2.20518547e-01 -3.08523957e-02  7.18751182e-01]
 [-4.52926220e-02  6.99676884e-02  1.04577056e-01 -6.28499355e-02
   3.05618374e-02  2.22139654e-02  5.54489451e-02  4.42612905e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 31 finished in 0.267494 and 0.00044322 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.9097666   0.69294212  0.09788482  0.87862715 -0.3383285  -1.01757721
 -0.00682735  0.10459989 -0.21099644 -0.09658672 -0.07506464 -0.2190509
 -0.84965396 -0.26646368 -0.85112841]
update hyper-parameter sigma to
[[ 1.46088521e+00 -1.17242178e+00 -4.48355286e-02  4.21709188e-01
  -6.30986445e-01 -3.32543629e-01 -6.84654892e-01 -5.93206817e-02
   6.82137576e-01 -1.49845916e-01  1.10509299e+00 -3.06197701e-01
  -2.00949110e-01  1.94998918e-01 -7.46719803e-01]
 [-1.17242178e+00  2.92625755e+00  6.95203983e-02 -2.11018810e-01
   8.92374899e-01  4.87096664e-01  4.38999006e-01 -2.49279627e-02
  -8.51122416e-01  2.08286929e-01 -1.87336491e+00  1.22618205e-01
  -2.26692023e-01 -3.05637165e-02  7.28591770e-01]
 [-4.48355286e-02  6.95203983e-02  1.02992058e-01 -6.10651467e-02
   2.89294247e-02  2.15871350e-02  5.18443726e-02  4.02423210e-03

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 32 finished in 0.277208 and 0.000891924 seconds respectively with log likelihood nan
update hyper-parameter mu to [-0.9663441   0.68962212  0.09971789  0.88061357 -0.35346742 -1.03008832
  0.00728305  0.10707084 -0.21337479 -0.08783586 -0.0796985  -0.23234207
 -0.85913759 -0.27596043 -0.86953929]
update hyper-parameter sigma to
[[ 1.52571043e+00 -1.18244267e+00 -4.40304990e-02  4.25754453e-01
  -6.62777343e-01 -3.44036627e-01 -7.00188833e-01 -6.31615019e-02
   7.02893274e-01 -1.53388044e-01  1.13808089e+00 -3.12282071e-01
  -2.01507369e-01  2.09815779e-01 -7.82751559e-01]
 [-1.18244267e+00  2.91394586e+00  6.90428610e-02 -2.14088893e-01
   8.95309188e-01  4.87759360e-01  4.22720398e-01 -2.54385723e-02
  -8.47814532e-01  2.04733677e-01 -1.86847403e+00  1.21973133e-01
  -2.32463907e-01 -2.98704504e-02  7.37306067e-01]
 [-4.40304990e-02  6.90428610e-02  1.01430901e-01 -5.93081930e-02
   2.71188642e-02  2.08882823e-02  4.80668310e-02  3.58836090e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 33 finished in 0.280915 and 0.000526905 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.02415571  0.68594524  0.10123325  0.88261167 -0.36814244 -1.04236227
  0.02120896  0.10928131 -0.21606892 -0.07940915 -0.08424961 -0.24485272
 -0.8681366  -0.28553976 -0.8876507 ]
update hyper-parameter sigma to
[[ 1.59301612e+00 -1.19181904e+00 -4.29072763e-02  4.28330395e-01
  -6.96083000e-01 -3.55689570e-01 -7.14578238e-01 -6.69695975e-02
   7.23239719e-01 -1.57027474e-01  1.17210018e+00 -3.16917536e-01
  -2.01114704e-01  2.25237522e-01 -8.18536015e-01]
 [-1.19181904e+00  2.90243797e+00  6.85593721e-02 -2.17190080e-01
   8.97838261e-01  4.88358619e-01  4.05730465e-01 -2.59973827e-02
  -8.44379649e-01  2.01509310e-01 -1.86343877e+00  1.21120437e-01
  -2.37759131e-01 -2.87734959e-02  7.45049075e-01]
 [-4.29072763e-02  6.85593721e-02  9.99022442e-02 -5.76052804e-02
   2.51427863e-02  2.01314478e-02  4.41848105e-02  3.12625966e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 34 finished in 0.268345 and 0.00047493 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.08306967  0.6819007   0.10244676  0.88463977 -0.38235229 -1.05444501
  0.03474591  0.11122806 -0.21890616 -0.07130843 -0.08867698 -0.25667858
 -0.87671634 -0.29516694 -0.90555172]
update hyper-parameter sigma to
[[ 1.66308119e+00 -1.20081423e+00 -4.15109254e-02  4.29755806e-01
  -7.30676148e-01 -3.67538119e-01 -7.28040616e-01 -7.07285160e-02
   7.43352998e-01 -1.60749292e-01  1.20714721e+00 -3.20306569e-01
  -1.99975744e-01  2.41216721e-01 -8.54158801e-01]
 [-1.20081423e+00  2.89189945e+00  6.80872843e-02 -2.20412418e-01
   8.99943878e-01  4.88957115e-01  3.88354374e-01 -2.66022334e-02
  -8.40962378e-01  1.98590111e-01 -1.85840374e+00  1.20157820e-01
  -2.42519556e-01 -2.72969853e-02  7.51986333e-01]
 [-4.15109254e-02  6.80872843e-02  9.84128173e-02 -5.59768899e-02
   2.30209341e-02  1.93310822e-02  4.02708749e-02  2.64626578e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 35 finished in 0.26657 and 0.000488043 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.14287901  0.67749306  0.10337806  0.88669327 -0.39610963 -1.06638078
  0.04768063  0.11290735 -0.22175321 -0.06353665 -0.09293288 -0.26790953
 -0.88493729 -0.3047954  -0.92332735]
update hyper-parameter sigma to
[[ 1.73617826e+00 -1.20975710e+00 -3.98988059e-02  4.30454301e-01
  -7.66223979e-01 -3.79604933e-01 -7.40777319e-01 -7.44126012e-02
   7.63391817e-01 -1.64539726e-01  1.24312430e+00 -3.22760750e-01
  -1.98355792e-01  2.57669326e-01 -8.89718194e-01]
 [-1.20975710e+00  2.88242656e+00  6.76355091e-02 -2.23816482e-01
   9.01627183e-01  4.89611061e-01  3.70939552e-01 -2.72435866e-02
  -8.37677337e-01  1.95958346e-01 -1.85349491e+00  1.19178034e-01
  -2.46700963e-01 -2.54920407e-02  7.58293052e-01]
 [-3.98988059e-02  6.76355091e-02  9.69673302e-02 -5.44387569e-02
   2.07798862e-02  1.85020560e-02  3.63988435e-02  2.15755415e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 36 finished in 0.259811 and 0.000418186 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.20332532  0.67274324  0.10405076  0.88874639 -0.40943878 -1.07820506
  0.05982303  0.11431692 -0.22451964 -0.05609719 -0.0969798  -0.27862858
 -0.89285125 -0.31437259 -0.94104838]
update hyper-parameter sigma to
[[ 1.81248906e+00 -1.21899787e+00 -3.81338613e-02  4.30878361e-01
  -8.02322502e-01 -3.91895303e-01 -7.52934490e-01 -7.79873901e-02
   7.83480739e-01 -1.68383082e-01  1.27985780e+00 -3.24639488e-01
  -1.96527294e-01  2.74476799e-01 -9.25288017e-01]
 [-1.21899787e+00  2.87405792e+00  6.72042455e-02 -2.27426092e-01
   9.02914787e-01  4.90370198e-01  3.53823230e-01 -2.79080207e-02
  -8.34617087e-01  1.93602418e-01 -1.84882079e+00  1.18260086e-01
  -2.50289277e-01 -2.34338937e-02  7.64140885e-01]
 [-3.81338613e-02  6.72042455e-02  9.55681841e-02 -5.29999597e-02
   1.84530845e-02  1.76587381e-02  3.26369622e-02  1.66957784e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 37 finished in 0.250982 and 0.000370979 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.26414413  0.66769002  0.10449041  0.89076075 -0.4223613  -1.08994302
  0.07104178  0.11545865 -0.22715675 -0.04899035 -0.10080327 -0.28891014
 -0.90049746 -0.32385087 -0.95875514]
update hyper-parameter sigma to
[[ 1.89205965e+00 -1.22886369e+00 -3.62771399e-02  4.31432617e-01
  -8.38552261e-01 -4.04400103e-01 -7.64583565e-01 -8.14127554e-02
   8.03696890e-01 -1.72260393e-01  1.31713996e+00 -3.26283385e-01
  -1.94723084e-01  2.91494146e-01 -9.60897508e-01]
 [-1.22886369e+00  2.86675871e+00  6.67858562e-02 -2.31231070e-01
   9.03847522e-01  4.91274039e-01  3.37293274e-01 -2.85787489e-02
  -8.31846269e-01  1.91511864e-01 -1.84445308e+00  1.17457291e-01
  -2.53300828e-01 -2.12059899e-02  7.69681043e-01]
 [-3.62771399e-02  6.67858562e-02  9.42156225e-02 -5.16638981e-02
   1.60777501e-02  1.68143293e-02  2.90416807e-02  1.19148652e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 38 finished in 0.269486 and 0.00089097 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.32508966  0.66238061  0.10472213  0.89269284 -0.43489838 -1.10160871
  0.08127697  0.11633907 -0.22965392 -0.04221322 -0.10441297 -0.29881561
 -0.90790395 -0.33319372 -0.97645881]
update hyper-parameter sigma to
[[ 1.97477176e+00 -1.23961140e+00 -3.43803598e-02  4.32411573e-01
  -8.74526554e-01 -4.17091150e-01 -7.75720268e-01 -8.46467806e-02
   8.24060393e-01 -1.76148326e-01  1.35475032e+00 -3.27959706e-01
  -1.93093779e-01  3.08568366e-01 -9.96507555e-01]
 [-1.23961140e+00  2.86044814e+00  6.63664935e-02 -2.35196723e-01
   9.04474971e-01  4.92345194e-01  3.21560298e-01 -2.92414422e-02
  -8.29399653e-01  1.89677050e-01 -1.84042294e+00  1.16808871e-01
  -2.55785793e-01 -1.88935788e-02  7.75027854e-01]
 [-3.43803598e-02  6.63664935e-02  9.29081144e-02 -5.04285143e-02
   1.36903930e-02  1.59792691e-02  2.56535082e-02  7.31200821e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 39 finished in 0.260125 and 0.000394106 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.38595701  0.65686573  0.10476964  0.89450704 -0.44707182 -1.11320565
  0.09053324  0.11696802 -0.23202456 -0.03575911 -0.10783611 -0.30839758
 -0.91508911 -0.34237547 -0.99414656]
update hyper-parameter sigma to
[[ 2.06036941e+00 -1.25140889e+00 -3.24824034e-02  4.33976059e-01
  -9.09925886e-01 -4.29929705e-01 -7.86281527e-01 -8.76496458e-02
   8.44542964e-01 -1.80021155e-01  1.39248059e+00 -3.29851911e-01
  -1.91699322e-01  3.25553602e-01 -1.03201057e+00]
 [-1.25140889e+00  2.85500809e+00  6.59292557e-02 -2.39271281e-01
   9.04845596e-01  4.93591523e-01  3.06746936e-01 -2.98853519e-02
  -8.27285850e-01  1.88086735e-01 -1.83671858e+00  1.16346478e-01
  -2.57812527e-01 -1.65738689e-02  7.80250714e-01]
 [-3.24824034e-02  6.59292557e-02  9.16429856e-02 -4.92880983e-02
   1.13231344e-02  1.51613869e-02  2.24965001e-02  2.94901915e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 40 finished in 0.249387 and 0.000583887 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.44658698  0.65119282  0.10465226  0.89617995 -0.45889487 -1.12473263
  0.09887103  0.1173587  -0.23429763 -0.02961848 -0.11111608 -0.31769799
 -0.92206551 -0.35138522 -1.01179004]
update hyper-parameter sigma to
[[ 2.14849018e+00 -1.26432861e+00 -3.06097078e-02  4.36159979e-01
  -9.44499681e-01 -4.42868673e-01 -7.96160845e-01 -9.03864746e-02
   8.65071890e-01 -1.83849800e-01  1.43014695e+00 -3.32041316e-01
  -1.90518644e-01  3.42313386e-01 -1.06724311e+00]
 [-1.26432861e+00  2.85031059e+00  6.54581150e-02 -2.43397479e-01
   9.04998740e-01  4.95010010e-01  2.92893015e-01 -3.05043326e-02
  -8.25492949e-01  1.86726247e-01 -1.83330323e+00  1.16084438e-01
  -2.59467143e-01 -1.43040273e-02  7.85378109e-01]
 [-3.06097078e-02  6.54581150e-02  9.04168172e-02 -4.82338388e-02
   9.00233321e-03  1.43656079e-02  1.95797265e-02 -1.13196324e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 41 finished in 0.259612 and 0.000441074 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.50685203  0.6453973   0.10438547  0.89770943 -0.47038414 -1.13618537
  0.10637667  0.11752536 -0.23650231 -0.02378051 -0.11429715 -0.32675811
 -0.92884599 -0.36021857 -1.02936218]
update hyper-parameter sigma to
[[ 2.23870971e+00 -1.27837637e+00 -2.87777563e-02  4.38899632e-01
  -9.78065637e-01 -4.55855754e-01 -8.05221333e-01 -9.28271884e-02
   8.85548631e-01 -1.87606510e-01  1.46758133e+00 -3.34547866e-01
  -1.89470334e-01  3.58727216e-01 -1.10199159e+00]
 [-1.27837637e+00  2.84624673e+00  6.49396546e-02 -2.47526371e-01
   9.04965618e-01  4.96590408e-01  2.79970048e-01 -3.10968452e-02
  -8.24001141e-01  1.85581135e-01 -1.83012036e+00  1.16042947e-01
  -2.60832222e-01 -1.21248235e-02  7.90415673e-01]
 [-2.87777563e-02  6.49396546e-02  8.92257989e-02 -4.72553717e-02
   6.74755278e-03  1.35947374e-02  1.69010763e-02 -4.90780233e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 42 finished in 0.247499 and 0.000479937 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.56665032  0.63950211  0.10398089  0.89911121 -0.48155611 -1.14756055
  0.11315354  0.11748195 -0.23866298 -0.01823461 -0.11741796 -0.33561634
 -0.93544427 -0.36887569 -1.04684187]
update hyper-parameter sigma to
[[ 2.33055694e+00 -1.29350027e+00 -2.69935936e-02  4.42053878e-01
  -1.01047916e+00 -4.68833587e-01 -8.13292428e-01 -9.49443150e-02
   9.05844403e-01 -1.91262632e-01  1.50461921e+00 -3.37343318e-01
  -1.88429829e-01  3.74679740e-01 -1.13599392e+00]
 [-1.29350027e+00  2.84272533e+00  6.43641893e-02 -2.51617246e-01
   9.04765321e-01  4.98315740e-01  2.67896499e-01 -3.16662392e-02
  -8.22779015e-01  1.84635109e-01 -1.82710113e+00  1.16243745e-01
  -2.61988402e-01 -1.00571741e-02  7.95341297e-01]
 [-2.69935936e-02  6.43641893e-02  8.80660892e-02 -4.63410301e-02
   4.57189590e-03  1.28497348e-02  1.44501456e-02 -8.37163132e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 43 finished in 0.26857 and 0.000490904 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.62589584  0.63351564  0.10344612  0.90042528 -0.49242257 -1.15885614
  0.11931136  0.11724185 -0.24079599 -0.01297099 -0.12051883 -0.34431611
 -0.94187828 -0.37736029 -1.06422621]
update hyper-parameter sigma to
[[ 2.42353034e+00 -1.30962424e+00 -2.52579410e-02  4.45422036e-01
  -1.04162124e+00 -4.81737747e-01 -8.20165291e-01 -9.67106637e-02
   9.25807728e-01 -1.94790998e-01  1.54109779e+00 -3.40368759e-01
  -1.87238165e-01  3.90055748e-01 -1.16893059e+00]
 [-1.30962424e+00  2.83969427e+00  6.37259766e-02 -2.55640104e-01
   9.04407540e-01  5.00170174e-01  2.56552612e-01 -3.22185366e-02
  -8.21798065e-01  1.83874233e-01 -1.82418298e+00  1.16717447e-01
  -2.63006707e-01 -8.10795526e-03  8.00119311e-01]
 [-2.52579410e-02  6.37259766e-02  8.69338542e-02 -4.54784072e-02
   2.48273870e-03  1.21299569e-02  1.22106061e-02 -1.15305166e-0

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 44 finished in 0.275621 and 0.000393867 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.68450539  0.62743031  0.10278507  0.9017178  -0.50300111 -1.17007426
  0.12495479  0.11681612 -0.24290239 -0.00798247 -0.12362761 -0.35290841
 -0.94817022 -0.38567565 -1.08153979]
update hyper-parameter sigma to
[[ 2.51707810e+00 -1.32664748e+00 -2.35652870e-02  4.48741775e-01
  -1.07136755e+00 -4.94494373e-01 -8.25572075e-01 -9.80945019e-02
   9.45249024e-01 -1.98163089e-01  1.57682696e+00 -3.43550151e-01
  -1.85704560e-01  4.04726050e-01 -1.20040072e+00]
 [-1.32664748e+00  2.83712483e+00  6.30225906e-02 -2.59574939e-01
   9.03886619e-01  5.02136565e-01  2.45784275e-01 -3.27640404e-02
  -8.21024898e-01  1.83284952e-01 -1.82129636e+00  1.17506158e-01
  -2.63948736e-01 -6.26512282e-03  8.04700633e-01]
 [-2.35652870e-02  6.30225906e-02  8.58252620e-02 -4.46536602e-02
   4.82238582e-04  1.14337073e-02  1.01616818e-02 -1.44025926e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 45 finished in 0.280553 and 0.000601768 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.74238381  0.62121796  0.10199761  0.90309388 -0.51330918 -1.18122394
  0.13018223  0.11621296 -0.24496699 -0.00326571 -0.12676343 -0.36145799
 -0.95435379 -0.39382282 -1.09885256]
update hyper-parameter sigma to
[[ 2.61055097e+00 -1.34446184e+00 -2.19040382e-02  4.51673781e-01
  -1.09955249e+00 -5.07005817e-01 -8.29143535e-01 -9.90525997e-02
   9.63919826e-01 -2.01346244e-01  1.61155295e+00 -3.46802282e-01
  -1.83589241e-01  4.18525296e-01 -1.22985790e+00]
 [-1.34446184e+00  2.83503394e+00  6.22535266e-02 -2.63409937e-01
   9.03179043e-01  5.04197869e-01  2.35402834e-01 -3.33164166e-02
  -8.20425187e-01  1.82857322e-01 -1.81836431e+00  1.18672356e-01
  -2.64871992e-01 -4.50244680e-03  8.09016905e-01]
 [-2.19040382e-02  6.22535266e-02  8.47362111e-02 -4.38503681e-02
  -1.43221160e-03  1.07576172e-02  8.27818835e-03 -1.70171679e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 46 finished in 0.260278 and 0.000575066 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.79941339e+00  6.14818998e-01  1.01077859e-01  9.04720127e-01
 -5.23365996e-01 -1.19232194e+00  1.35088616e-01  1.15437033e-01
 -2.46948248e-01  1.17704058e-03 -1.29936867e-01 -3.70065433e-01
 -9.60478579e-01 -4.01801346e-01 -1.11630499e+00]
update hyper-parameter sigma to
[[ 2.70312159e+00 -1.36296156e+00 -2.02548169e-02  4.53750078e-01
  -1.12592795e+00 -5.19137213e-01 -8.30338681e-01 -9.95190640e-02
   9.81468492e-01 -2.04300217e-01  1.64491953e+00 -3.50019419e-01
  -1.80559421e-01  4.31219157e-01 -1.25651395e+00]
 [-1.36296156e+00  2.83349045e+00  6.14201169e-02 -2.67141095e-01
   9.02242013e-01  5.06338430e-01  2.25172303e-01 -3.38954836e-02
  -8.19961746e-01  1.82587419e-01 -1.81529746e+00  1.20313326e-01
  -2.65837569e-01 -2.77172380e-03  8.12980537e-01]
 [-2.02548169e-02  6.14201169e-02  8.36622835e-02 -4.30481897e-02
  

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 47 finished in 0.259502 and 0.000403881 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.85539726  0.60812572  0.10001108  0.90688067 -0.5332001  -1.20340233
  0.13975315  0.11448508 -0.24875642  0.00533189 -0.13313648 -0.37888905
 -0.96662754 -0.40960339 -1.13418646]
update hyper-parameter sigma to
[[ 2.79359455e+00 -1.38202939e+00 -1.85833986e-02  4.54262893e-01
  -1.15006561e+00 -5.30676747e-01 -8.28301276e-01 -9.93833418e-02
   9.97343713e-01 -2.06964320e-01  1.67634191e+00 -3.53062642e-01
  -1.76113104e-01  4.42439551e-01 -1.27910471e+00]
 [-1.38202939e+00  2.83262595e+00  6.05230669e-02 -2.70769334e-01
   9.00979949e-01  5.08537432e-01  2.14771512e-01 -3.45310709e-02
  -8.19578696e-01  1.82477625e-01 -1.81195109e+00  1.22583793e-01
  -2.66918974e-01 -9.91796072e-04  8.16452126e-01]
 [-1.85833986e-02  6.05230669e-02  8.25979798e-02 -4.22181098e-02
  -5.04307264e-03  9.44347786e-03  4.87553541e-03 -2.16565440e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 48 finished in 0.253432 and 0.000555038 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.90997799  0.60093391  0.09876691  0.91009454 -0.54286361 -1.21453525
  0.14422736  0.11333985 -0.25020644  0.00916267 -0.13631577 -0.38821529
 -0.97295526 -0.41720154 -1.15308992]
update hyper-parameter sigma to
[[ 2.88001665e+00 -1.40151232e+00 -1.68273656e-02  4.52027913e-01
  -1.17118580e+00 -5.41255673e-01 -8.21568557e-01 -9.84474571e-02
   1.01059327e+00 -2.09237059e-01  1.70478610e+00 -3.55714374e-01
  -1.69398387e-01  4.51551283e-01 -1.29542374e+00]
 [-1.40151232e+00  2.83269016e+00  5.95605696e-02 -2.74304113e-01
   8.99211413e-01  5.10761554e-01  2.03717906e-01 -3.52725428e-02
  -8.19184831e-01  1.82543318e-01 -1.80807808e+00  1.25754230e-01
  -2.68233663e-01  9.80866622e-04  8.19202981e-01]
 [-1.68273656e-02  5.95605696e-02  8.15356688e-02 -4.13141638e-02
  -6.78719035e-03  8.78242309e-03  3.25418987e-03 -2.38353583e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 49 finished in 0.258397 and 0.000478268 seconds respectively with log likelihood nan
update hyper-parameter mu to [-1.96241824  0.59284573  0.09728396  0.9153697  -0.55246023 -1.22586813
  0.14850322  0.11195552 -0.25091048  0.01258692 -0.13933573 -0.39859721
 -0.97976468 -0.42452758 -1.17429317]
update hyper-parameter sigma to
[[ 2.95888670e+00 -1.42118912e+00 -1.48700647e-02  4.44943065e-01
  -1.18777952e+00 -5.50188255e-01 -8.07480435e-01 -9.63393105e-02
   1.01945636e+00 -2.10933954e-01  1.72825485e+00 -3.57620398e-01
  -1.58859512e-01  4.57375614e-01 -1.30132553e+00]
 [-1.42118912e+00  2.83414776e+00  5.85254295e-02 -2.77790563e-01
   8.96569672e-01  5.12954446e-01  1.91208347e-01 -3.62075898e-02
  -8.18616274e-01  1.82823763e-01 -1.80319200e+00  1.30332682e-01
  -2.69988217e-01  3.41066058e-03  8.20828740e-01]
 [-1.48700647e-02  5.85254295e-02  8.04638053e-02 -4.02595717e-02
  -8.56144186e-03  8.08664732e-03  1.56330588e-03 -2.61100694e-

  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)
  numpy.exp(log_phi) * log_phi * term_counts)


[[nan]] nan
e_step and m_step of iteration 50 finished in 0.260164 and 0.000482082 seconds respectively with log likelihood nan
update hyper-parameter mu to [-2.01139391  0.58317858  0.09545674  0.92444535 -0.56215906 -1.23766784
  0.15254806  0.11024599 -0.25016808  0.01544997 -0.14190671 -0.41099179
 -0.98753846 -0.43144622 -1.20029562]
update hyper-parameter sigma to
[[ 3.02427903e+00 -1.44089274e+00 -1.25332282e-02  4.29736777e-01
  -1.19716958e+00 -5.56304397e-01 -7.81484180e-01 -9.24179301e-02
   1.02094675e+00 -2.11764779e-01  1.74317900e+00 -3.58396019e-01
  -1.41914610e-01  4.57824757e-01 -1.28963832e+00]
 [-1.44089274e+00  2.83791263e+00  5.74098521e-02 -2.81475183e-01
   8.92435759e-01  5.15052620e-01  1.75959356e-01 -3.74761749e-02
  -8.17679868e-01  1.83416844e-01 -1.79649412e+00  1.37324288e-01
  -2.72481640e-01  6.73593675e-03  8.20865519e-01]
 [-1.25332282e-02  5.74098521e-02  7.93658932e-02 -3.89541244e-02
  -1.04633455e-02  7.31385098e-03 -3.47982274e-04 -2.87139270e-

In [27]:
logl, lamda, nu = ctm_inferencer.inference(all_words)

ll = utils.topic_beta(ctm_inferencer)

topic_words = pd.DataFrame(ll)

successfully parse 10 documents...


In [32]:
ll

[{'languag': 0.9550024041822259,
  'email': 0.04499759581777409,
  'hour': 0.0,
  'area': 0.0,
  'teach': 0.0,
  'student': 0.0,
  'design': 0.0,
  'program': 0.0,
  'list': 0.0,
  'time': 0.0,
  'project': 0.0,
  'distribut': 0.0,
  'fall': 0.0,
  'current': 0.0,
  'base': 0.0,
  'softwar': 0.0,
  'mail': 0.0,
  'algorithm': 0.0,
  'link': 0.0,
  'develop': 0.0,
  'gener': 0.0,
  'group': 0.0,
  'web': 0.0,
  'updat': 0.0,
  'includ': 0.0,
  'model': 0.0,
  'inform': 0.0,
  'interest': 0.0,
  'data': 0.0,
  'professor': 0.0,
  'offic': 0.0,
  'applic': 0.0,
  'schedul': 0.0,
  'parallel': 0.0,
  'home': 0.0,
  'graduat': 0.0,
  'engin': 0.0,
  'fax': 0.0,
  'process': 0.0,
  'technolog': 0.0,
  'network': 0.0,
  'assist': 0.0,
  'class': 0.0,
  'work': 0.0,
  'problem': 0.0,
  'system': 0.0,
  'public': 0.0,
  'phone': 0.0,
  'paper': 0.0,
  'address': 0.0},
 {'offic': 0.17683265141400326,
  'mail': 0.16961588788988885,
  'program': 0.16961588788988885,
  'gener': 0.1388697436260792,


In [28]:
topic_str_repr = []
for row in topic_words.iterrows():
    print(pd.DataFrame(row[1].sort_values(ascending=False)[:3]).T)
    topic_str_repr.append(' '.join(row[1].sort_values(ascending=False)[:3].index))

    languag     email  inform
0  0.955002  0.044998     0.0
      offic      mail   program
1  0.176833  0.169616  0.169616
   professor   project  distribut
2   0.641834  0.358166        0.0
       link      home      list
3  0.229816  0.147937  0.107528
   student  problem  work
4      0.5      0.5   0.0
   engin  work  email
5    1.0   0.0    0.0
      paper     email  work
6  0.528642  0.471358   0.0
   phone  work  email
7    1.0   0.0    0.0
   inform    work  email
8  0.9304  0.0696    0.0
   offic  work  email
9    1.0   0.0    0.0
    parallel     model    public
10   0.20256  0.175595  0.118599
         web  distribut  email
11  0.809998   0.190002    0.0
       group   graduat   softwar
12  0.475367  0.174878  0.174878
      assist     updat  work
13  0.697059  0.302941   0.0
         fax   address  email
14  0.988735  0.011265    0.0


In [29]:
word_dct_values = list(dct.values())

dff = pd.DataFrame(ctm_model.beta, columns=word_dct_values)
dff = dff[sorted(dff.columns)]

In [34]:
dff

Unnamed: 0,address,algorithm,applic,area,assist,base,class,current,data,design,...,schedul,softwar,student,system,teach,technolog,time,updat,web,work
0,4.297508e-28,4.442288e-13,1.594608e-58,0.0,3.6268890000000003e-28,3.9077e-35,9.228855e-59,4.035642e-59,1.331595e-43,2.3078139999999997e-34,...,3.660534e-34,6.221078e-61,1.361251e-23,9.341098e-13,2.62084e-58,2.3502420000000003e-43,0.0,3.4006040000000003e-43,7.610733e-16,4.908232e-58
1,1.0860650000000001e-27,3.042558e-13,1.112333e-57,0.0,6.809378e-28,3.429169e-34,1.286617e-57,8.110761e-59,6.945123999999999e-44,9.500631e-34,...,1.088641e-34,1.2725819999999999e-57,1.286415e-22,2.524302e-12,5.519617e-58,2.224009e-43,0.0,6.674937e-43,1.9785090000000003e-17,5.927602e-58
2,1.8841140000000002e-28,7.210961e-14,6.429234e-58,0.0,2.121613e-28,4.698312e-34,3.240783e-58,1.763183e-58,4.682648e-43,4.425781e-34,...,3.5731629999999997e-34,6.573493e-58,4.4876960000000004e-23,8.529095e-13,2.662254e-58,4.034828e-43,0.0,6.8445770000000006e-43,3.584997e-16,4.153564e-58
3,1.201195e-27,1.189983e-12,7.382704e-58,0.0,6.240019e-28,5.481158e-34,6.340583e-59,2.364767e-58,3.685738e-43,5.534289e-35,...,1.114838e-34,3.1940849999999996e-58,2.0690840000000002e-23,8.630395e-13,1.684885e-59,6.503306e-43,0.0,2.929934e-43,4.061334e-16,9.278982e-58
4,1.8406260000000002e-27,2.94164e-12,1.475254e-57,0.0,2.624923e-27,1.7559810000000002e-33,1.487372e-57,8.136686e-58,2.340282e-42,9.507488999999999e-34,...,1.353739e-33,1.882511e-57,4.0656420000000003e-23,3.575254e-12,3.412444e-58,1.130207e-42,0.0,2.6409229999999998e-42,1.682826e-15,6.666696e-58
5,2.6557210000000003e-27,2.671455e-12,4.089628e-57,0.0,5.166151e-27,2.2672590000000003e-33,3.2473489999999997e-57,1.101824e-57,6.065485999999999e-44,1.131217e-33,...,2.906701e-33,2.795796e-57,2.932483e-22,5.224053e-12,2.955734e-57,2.9806499999999997e-42,0.0,4.0665379999999997e-42,5.811925e-15,2.940288e-57
6,6.479323e-28,3.999573e-13,4.822469e-58,0.0,2.1215220000000002e-28,3.3358809999999997e-34,3.283193e-58,4.474074e-58,2.2334280000000002e-43,3.72929e-35,...,3.571652e-35,3.631423e-58,2.2001420000000003e-23,3.827787e-13,2.802645e-58,5.016529e-43,0.0,7.368146e-45,8.034731e-16,2.6512349999999998e-58
7,5.5164420000000005e-28,3.455159e-13,1.136104e-58,0.0,6.021776e-28,7.121135e-35,2.856959e-59,3.877539e-58,5.281742e-43,8.267019e-35,...,2.010455e-34,5.300393e-58,4.665412e-23,7.206113e-13,2.08005e-58,2.5436000000000002e-43,0.0,9.684001e-44,6.724321e-16,2.996886e-58
8,3.3566660000000003e-28,3.811897e-13,3.794479e-58,0.0,1.54847e-28,2.101832e-34,2.151644e-58,4.446637e-58,1.49333e-43,2.508354e-34,...,2.5576079999999998e-34,4.5018899999999996e-58,1.596453e-23,6.388451e-13,2.055806e-58,6.343626e-43,0.0,7.492647999999999e-44,8.258365e-16,1.575674e-58
9,3.2636190000000003e-28,9.784919e-14,6.648659e-58,0.0,3.233717e-28,1.247186e-35,2.209099e-58,2.242135e-58,8.335059e-43,2.75369e-34,...,2.748975e-34,7.045917e-58,1.735251e-23,1.455828e-13,5.810166e-58,4.175769e-44,0.0,3.7189859999999997e-44,8.432158e-16,6.048442e-60


In [30]:
# from gensim import models
# model = models.LdaModel(corpus, id2word=dct, num_topics=num_terms)

In [31]:
import gensim
XX = gensim.matutils.corpus2dense(model[corpus], num_terms=num_terms).T
cols = list(dct.values())
df = pd.DataFrame(XX, columns=cols, index=y)
# df[words_to_keep].T.sort_index().T

# X = df[words_to_keep].values
X = df.values
y = df.index

ValueError: Shape of passed values is (10, 50), indices imply (2785, 50)

In [None]:
from regain.covariance import kernel_time_graphical_lasso_
from regain.model_selection import stability_optimization
from sklearn.model_selection import StratifiedShuffleSplit

mdl = kernel_time_graphical_lasso_.KernelTimeGraphicalLasso(
    verbose=0, kernel=np.ones((np.unique(y).size, np.unique(y).size)), psi='l1',
    alpha=0.45, max_iter=1000).fit(X, y)

In [None]:
socv = stability_optimization.GraphicalModelStabilitySelection(
    mdl, param_grid=dict(alpha=np.logspace(2, -2)), cv=StratifiedShuffleSplit(100)
).fit(X, y)

In [None]:
n_times = np.unique(y).size
n_dim = X.shape[1]

In [None]:
idx = np.triu_indices(n_dim, 1)
dof = idx[0].size * n_times

In [None]:
from regain import utils
# utils.save_pickle(socv, "socv")

socv = utils.load_pickle("socv.pkl")

In [None]:
mdl = socv.best_estimator_

In [None]:
print("Nonzero percentage: %.4f" % (np.sum([np.count_nonzero(P[idx]) for P in mdl.precision_]) / dof))

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from kdge import plot_plotly
import plotly.offline as py
import plotly.graph_objs as go
from plotly import tools

py.init_notebook_mode()
import plotly.io as pio

In [None]:
p = mdl.precision_[0]

In [None]:
from regain.utils import retain_top_n

In [None]:
k = -1
trace = []
graphs = []
# for i, p in enumerate(ltgl.precision_ - ltgl.latent_):
for i, p in enumerate(mdl.precision_):

    A = np.abs(p - np.diag(np.diag(p)))
    A = retain_top_n(A, top_n)
    G = nx.from_numpy_matrix(A * 0.00001)
    graphs.append(G)
    trace.append(pl.plot_circular(G, df.columns, 1.4, cmap='Blues',
                                  #color_nodes=plt.rcParams['axes.prop_cycle'].by_key()['color'][:5]
                                 ))

fig = tools.make_subplots(
    rows=1, cols=len(mdl.precision_), horizontal_spacing=.1, print_grid=False)

for j, tr in enumerate(trace):
    tr['data'][k]['xaxis'] = 'x' + str(j+1)
    tr['data'][k]['yaxis'] = 'y' + str(j+1)

for j, tr in enumerate(trace):
    for i, x in enumerate(tr['data']):
        col = j + 1
        x['legendgroup'] = 'group'+ str(j+1)
        x['showlegend'] = False
        fig.append_trace(x,1,col)
        


for j, tr in enumerate(trace):
    fig.layout.annotations += tuple([pl._set_ref(
        x, 'x'+ str(j+1), 'y'+ str(j+1)) for x in tr['layout']['annotations']])

    fig['layout']['xaxis'+str(j+1)].update(showgrid=False, zeroline=False, showticklabels=False)
    fig['layout']['yaxis'+str(j+1)].update(showgrid=False, zeroline=False, showticklabels=False)

py.init_notebook_mode()

fig['layout'].update(height=900, width=4000,hovermode='closest',
                     paper_bgcolor='rgba(0,0,0,0)',
                        plot_bgcolor='rgba(0,0,0,0)')
# fig.layout.annotations += tuple([dict(
#     text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
#     showarrow=False, xref="paper", yref="paper", x=0.005, y=-0.2)])
# fig['layout'].update(scene=dict(aspectmode="data"))
py.iplot(fig)
# py.offline.iplot(fig, filename='figure_factory_subplot', image='svg')

In [None]:
pio.write_image(fig, "graphs.pdf")

In [None]:
import pyctm

from pyctm import variational_bayes, inferencer

# parameter set 3
alpha_mu=0.
alpha_sigma=.1
alpha_beta=-.1

ctm_inferencer = variational_bayes.VariationalBayes();
ctm_inferencer._initialize(train.words.tolist(), words, 20, alpha_mu, alpha_sigma, alpha_beta);

for iteration in range(50):
    ctm_inferencer.learning(-1)

logl, lamda, nu = ctm_inferencer.inference(train.words.tolist())

ll = utils.topic_beta(ctm_inferencer)

topic_words = pd.DataFrame(ll)

In [None]:
np.abs(lamda).sum()

In [None]:
from pyctm import utils

In [None]:
import numpy as np
from sklearn.covariance import GraphicalLassoCV

from regain import utils

In [None]:
utils.save_pickle(lamda, "lambda_ctm.pkl")
lamda = utils.load_pickle('lambda_ctm.pkl')

In [None]:
dff.values.shape

In [None]:
gl = GraphicalLassoCV().fit(lamda)

In [None]:
p = gl.precision_

A = np.abs(p - np.diag(np.diag(p)))
A = retain_top_n(A, 20)
G = nx.from_numpy_matrix(A * 3)
fig = pl.plot_circular(G, topic_str_repr, 2, cmap='Blues')

fig['layout'].update(height=800, width=800,hovermode='closest',
                     paper_bgcolor='rgba(0,0,0,0)',
                        plot_bgcolor='rgba(0,0,0,0)')
# fig.layout.annotations += tuple([dict(
#     text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
#     showarrow=False, xref="paper", yref="paper", x=0.005, y=-0.2)])
# fig['layout'].update(scene=dict(aspectmode="data"))
py.iplot(fig)
# py.offline.iplot(fig, filename='figure_factory_subplot', image='svg')