In [1]:
%matplotlib inline

# Standard imports
import os
import string
import sys

In [2]:
# Setup path
sys.path.append("../")
sys.path.append("../contraxsuite_services/")

In [3]:
# Django imports
import django
from django.db import IntegrityError

# Setup django environment
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
django.setup()

In [10]:
# Import document model
import nltk
from contraxsuite_services.apps.document import *
from contraxsuite_services.apps.task.utils.nlp import *
from contraxsuite_services.apps.task.utils.text import *
from contraxsuite_services.apps.task.tasks import normalize, stem_tokens, stemmer

In [5]:
# Data science imports
import gensim
import gensim.corpora
import gensim.models
import gensim.models.word2vec

2017-08-01 18:56:54,898 [MainThread  ] [INFO ]  'pattern' package not found; tag filters are not available for English


## Generate sentence sample list

In [15]:
# Build training sample
sentence_list = []

# Iterate and print
for d in Document.objects.all()[0:50]:
    for tu in d.textunit_set.filter(unit_type="sentence").order_by("id").all():
        sentence_list.append(["_".join(l) for l in list(nltk.ngrams(normalize(tu.text), 2))])

## Build LDA model

In [16]:
# Build bigram model
bigram_corpus_lt = gensim.corpora.Dictionary(sentence_list)
bigram_corpus_bow = [bigram_corpus_lt.doc2bow(t) for t in sentence_list]

2017-08-01 18:58:57,384 [MainThread  ] [INFO ]  adding document #0 to Dictionary(0 unique tokens: [])
2017-08-01 18:58:57,790 [MainThread  ] [INFO ]  adding document #10000 to Dictionary(51702 unique tokens: ['document_and', 'chemic_with', 'of_incorpor', 'plan_on', 'deem_member']...)
2017-08-01 18:58:57,956 [MainThread  ] [INFO ]  built Dictionary(67894 unique tokens: ['document_and', 'chemic_with', 'of_incorpor', 'plan_on', 'deem_member']...) from 14126 documents (total 323328 corpus positions)


In [18]:
ldamodel = gensim.models.ldamodel.LdaModel(bigram_corpus_bow, num_topics=10,
                                           id2word = bigram_corpus_lt,
                                           passes=10)

2017-08-01 19:02:00,786 [MainThread  ] [INFO ]  using symmetric alpha at 0.1
2017-08-01 19:02:00,788 [MainThread  ] [INFO ]  using symmetric eta at 1.472884201844051e-05
2017-08-01 19:02:00,802 [MainThread  ] [INFO ]  using serial LDA version on this node
2017-08-01 19:02:08,290 [MainThread  ] [INFO ]  running online (multi-pass) LDA training, 10 topics, 10 passes over the supplied corpus of 14126 documents, updating model once every 2000 documents, evaluating perplexity every 14126 documents, iterating 50x with a convergence threshold of 0.001000
2017-08-01 19:02:08,292 [MainThread  ] [INFO ]  PROGRESS: pass 0, at document #2000/14126
2017-08-01 19:02:10,982 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:02:11,272 [MainThread  ] [INFO ]  topic #0 (0.100): 0.013*"se_americom" + 0.010*"thi_agreement" + 0.007*"of_the" + 0.004*"for_the" + 0.004*"shall_be" + 0.004*"the_servic" + 0.004*"with_the" + 0.004*"of_thi" + 0.003*"in_the" + 

2017-08-01 19:02:22,541 [MainThread  ] [INFO ]  topic diff=0.987491, rho=0.447214
2017-08-01 19:02:22,542 [MainThread  ] [INFO ]  PROGRESS: pass 0, at document #12000/14126
2017-08-01 19:02:24,145 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:02:24,929 [MainThread  ] [INFO ]  topic #9 (0.100): 0.007*"of_the" + 0.005*"or_other" + 0.005*"of_ani" + 0.004*"in_the" + 0.004*"shall_be" + 0.004*"or_ani" + 0.004*"to_the" + 0.004*"ani_of" + 0.003*"in_thi" + 0.003*"by_ani"
2017-08-01 19:02:24,931 [MainThread  ] [INFO ]  topic #3 (0.100): 0.010*"of_the" + 0.008*"to_the" + 0.007*"shall_be" + 0.005*"in_the" + 0.003*"with_the" + 0.003*"the_softwar" + 0.003*"requir_to" + 0.003*"law_of" + 0.003*"to_be" + 0.003*"the_law"
2017-08-01 19:02:24,933 [MainThread  ] [INFO ]  topic #4 (0.100): 0.010*"of_the" + 0.007*"tenant_shall" + 0.007*"thi_leas" + 0.007*"the_leas" + 0.006*"leas_premis" + 0.006*"the_premis" + 0.005*"to_the" + 0.004*"to_landlord" + 0

2017-08-01 19:02:33,194 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:02:33,855 [MainThread  ] [INFO ]  topic #8 (0.100): 0.015*"of_the" + 0.007*"to_the" + 0.006*"of_such" + 0.006*"of_ani" + 0.005*"of_it" + 0.005*"thi_agreement" + 0.004*"shall_be" + 0.004*"in_the" + 0.004*"shall_not" + 0.003*"ani_such"
2017-08-01 19:02:33,856 [MainThread  ] [INFO ]  topic #9 (0.100): 0.007*"of_the" + 0.006*"by_ani" + 0.005*"in_connect" + 0.005*"in_the" + 0.005*"or_other" + 0.005*"of_ani" + 0.004*"connect_with" + 0.004*"ani_of" + 0.004*"or_ani" + 0.003*"in_thi"
2017-08-01 19:02:33,858 [MainThread  ] [INFO ]  topic #6 (0.100): 0.009*"to_the" + 0.009*"of_the" + 0.008*"thi_agreement" + 0.006*"the_parti" + 0.004*"with_respect" + 0.004*"respect_to" + 0.004*"the_issuer" + 0.003*"as_of" + 0.003*"and_to" + 0.003*"of_thi"
2017-08-01 19:02:33,860 [MainThread  ] [INFO ]  topic #1 (0.100): 0.014*"the_secur" + 0.012*"of_the" + 0.009*"portion_of" + 0.007*"co

2017-08-01 19:02:42,795 [MainThread  ] [INFO ]  topic #9 (0.100): 0.011*"by_ani" + 0.011*"credit_parti" + 0.009*"in_connect" + 0.007*"in_the" + 0.007*"or_other" + 0.006*"of_the" + 0.006*"connect_with" + 0.005*"ani_subsidiari" + 0.005*"of_ani" + 0.004*"debt_includ"
2017-08-01 19:02:42,797 [MainThread  ] [INFO ]  topic #5 (0.100): 0.013*"of_the" + 0.006*"fiscal_year" + 0.005*"for_the" + 0.004*"permit_ani" + 0.004*"to_the" + 0.004*"abov_are" + 0.004*"not_guarante" + 0.004*"are_use" + 0.004*"the_compani" + 0.004*"the_form"
2017-08-01 19:02:42,799 [MainThread  ] [INFO ]  topic #2 (0.100): 0.008*"of_permit" + 0.007*"of_the" + 0.006*"2_such" + 0.006*"and_each" + 0.005*"cours_of" + 0.005*"of_thi" + 0.005*"exhibit_a" + 0.005*"the_undersign" + 0.005*"to_the" + 0.005*"permit_debt"
2017-08-01 19:02:42,801 [MainThread  ] [INFO ]  topic #8 (0.100): 0.018*"of_the" + 0.009*"of_such" + 0.007*"of_ani" + 0.007*"credit_parti" + 0.006*"of_it" + 0.006*"to_the" + 0.006*"and_expens" + 0.005*"in_the" + 0.005*"

2017-08-01 19:02:51,144 [MainThread  ] [INFO ]  topic #1 (0.100): 0.010*"the_secur" + 0.008*"of_the" + 0.006*"portion_of" + 0.006*"with_the" + 0.006*"copi_of" + 0.005*"confidenti_treatment" + 0.005*"under_the" + 0.005*"secur_and" + 0.005*"pursuant_to" + 0.004*"to_a"
2017-08-01 19:02:51,146 [MainThread  ] [INFO ]  topic #2 (0.100): 0.007*"of_the" + 0.005*"thi_agreement" + 0.005*"shall_be" + 0.005*"of_thi" + 0.004*"to_the" + 0.004*"the_softwar" + 0.004*"exhibit_a" + 0.003*"licens_fee" + 0.003*"the_parti" + 0.003*"the_licens"
2017-08-01 19:02:51,148 [MainThread  ] [INFO ]  topic diff=0.254678, rho=0.315236
2017-08-01 19:02:51,149 [MainThread  ] [INFO ]  PROGRESS: pass 2, at document #12000/14126
2017-08-01 19:02:52,189 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:02:52,823 [MainThread  ] [INFO ]  topic #8 (0.100): 0.015*"of_the" + 0.008*"of_ani" + 0.007*"to_the" + 0.006*"of_such" + 0.005*"thi_agreement" + 0.005*"of_it" + 0.005*"

2017-08-01 19:02:58,881 [MainThread  ] [INFO ]  topic #3 (0.100): 0.008*"of_the" + 0.007*"se_americom" + 0.006*"to_the" + 0.006*"by_s" + 0.005*"shall_be" + 0.005*"the_facil" + 0.005*"in_the" + 0.004*"the_state" + 0.004*"with_the" + 0.004*"accord_with"
2017-08-01 19:02:58,883 [MainThread  ] [INFO ]  topic diff=0.234870, rho=0.300652
2017-08-01 19:02:58,884 [MainThread  ] [INFO ]  PROGRESS: pass 3, at document #6000/14126
2017-08-01 19:03:00,026 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:03:00,552 [MainThread  ] [INFO ]  topic #9 (0.100): 0.007*"by_ani" + 0.007*"credit_parti" + 0.006*"in_the" + 0.006*"in_connect" + 0.005*"or_other" + 0.005*"of_the" + 0.004*"connect_with" + 0.004*"of_ani" + 0.003*"to_the" + 0.003*"as_a"
2017-08-01 19:03:00,554 [MainThread  ] [INFO ]  topic #4 (0.100): 0.013*"of_the" + 0.008*"thi_leas" + 0.007*"the_premis" + 0.005*"tenant_shall" + 0.005*"to_the" + 0.005*"shall_be" + 0.005*"such_debt" + 0.003*"b

2017-08-01 19:03:07,766 [MainThread  ] [INFO ]  PROGRESS: pass 3, at document #14126/14126
2017-08-01 19:03:07,828 [MainThread  ] [INFO ]  merging changes from 126 documents into a model of 14126 documents
2017-08-01 19:03:08,529 [MainThread  ] [INFO ]  topic #9 (0.100): 0.015*"credit_parti" + 0.011*"such_debt" + 0.010*"by_ani" + 0.008*"in_connect" + 0.008*"in_the" + 0.006*"or_other" + 0.005*"ani_subsidiari" + 0.005*"connect_with" + 0.005*"permit_acquisit" + 0.005*"a_permit"
2017-08-01 19:03:08,531 [MainThread  ] [INFO ]  topic #2 (0.100): 0.008*"of_permit" + 0.007*"of_the" + 0.007*"permit_debt" + 0.006*"and_each" + 0.006*"2_such" + 0.005*"exhibit_a" + 0.005*"the_undersign" + 0.005*"shall_be" + 0.004*"of_thi" + 0.004*"item_of"
2017-08-01 19:03:08,532 [MainThread  ] [INFO ]  topic #1 (0.100): 0.015*"david_j" + 0.015*"j_clark" + 0.011*"expens_in" + 0.009*"athyrium_opportun" + 0.008*"s_david" + 0.008*"llc_gener" + 0.008*"je_flynn" + 0.008*"flynn_capit" + 0.008*"capit_llc" + 0.008*"by_je"


2017-08-01 19:03:16,276 [MainThread  ] [INFO ]  topic #6 (0.100): 0.012*"of_the" + 0.010*"to_the" + 0.009*"thi_agreement" + 0.006*"with_respect" + 0.006*"respect_to" + 0.006*"the_parti" + 0.004*"the_licens" + 0.004*"the_support" + 0.004*"enter_into" + 0.004*"and_the"
2017-08-01 19:03:16,277 [MainThread  ] [INFO ]  topic #2 (0.100): 0.006*"of_the" + 0.005*"thi_agreement" + 0.005*"shall_be" + 0.004*"the_softwar" + 0.004*"to_the" + 0.004*"of_thi" + 0.004*"ole_chemic" + 0.004*"exhibit_a" + 0.003*"the_parti" + 0.003*"licens_fee"
2017-08-01 19:03:16,279 [MainThread  ] [INFO ]  topic #8 (0.100): 0.017*"of_the" + 0.008*"of_ani" + 0.006*"to_the" + 0.006*"of_such" + 0.005*"thi_agreement" + 0.005*"of_it" + 0.005*"shall_not" + 0.004*"the_other" + 0.004*"ani_such" + 0.004*"ani_of"
2017-08-01 19:03:16,281 [MainThread  ] [INFO ]  topic #9 (0.100): 0.006*"by_ani" + 0.005*"or_other" + 0.005*"in_the" + 0.005*"in_connect" + 0.005*"of_ani" + 0.004*"of_the" + 0.004*"credit_parti" + 0.003*"connect_with" + 0

2017-08-01 19:03:23,484 [MainThread  ] [INFO ]  topic #1 (0.100): 0.015*"the_secur" + 0.013*"of_the" + 0.013*"portion_of" + 0.010*"confidenti_treatment" + 0.009*"secur_and" + 0.009*"pursuant_to" + 0.008*"with_the" + 0.008*"to_a" + 0.007*"and_exchang" + 0.007*"exchang_commiss"
2017-08-01 19:03:23,486 [MainThread  ] [INFO ]  topic #9 (0.100): 0.011*"such_debt" + 0.010*"credit_parti" + 0.008*"by_ani" + 0.007*"in_the" + 0.006*"in_connect" + 0.005*"or_other" + 0.004*"connect_with" + 0.004*"of_ani" + 0.004*"of_the" + 0.004*"to_the"
2017-08-01 19:03:23,488 [MainThread  ] [INFO ]  topic #5 (0.100): 0.010*"of_the" + 0.006*"for_the" + 0.005*"se_americom" + 0.004*"to_the" + 0.003*"the_initi" + 0.003*"extend_term" + 0.003*"shall_be" + 0.003*"the_softwar" + 0.003*"the_form" + 0.003*"fiscal_year"
2017-08-01 19:03:23,490 [MainThread  ] [INFO ]  topic diff=0.187527, rho=0.276680
2017-08-01 19:03:23,491 [MainThread  ] [INFO ]  PROGRESS: pass 5, at document #6000/14126
2017-08-01 19:03:24,594 [MainThrea

2017-08-01 19:03:31,553 [MainThread  ] [INFO ]  topic #3 (0.100): 0.008*"of_the" + 0.007*"shall_be" + 0.006*"to_the" + 0.004*"with_the" + 0.004*"in_the" + 0.004*"accord_with" + 0.004*"in_accord" + 0.003*"by_s" + 0.003*"law_of" + 0.003*"the_parti"
2017-08-01 19:03:31,555 [MainThread  ] [INFO ]  topic #8 (0.100): 0.017*"of_the" + 0.008*"of_ani" + 0.006*"to_the" + 0.006*"thi_agreement" + 0.006*"of_such" + 0.005*"shall_not" + 0.005*"of_it" + 0.005*"the_other" + 0.004*"ani_of" + 0.004*"ani_such"
2017-08-01 19:03:31,558 [MainThread  ] [INFO ]  topic diff=0.115595, rho=0.276680
2017-08-01 19:03:32,021 [MainThread  ] [INFO ]  -9.392 per-word bound, 671.9 perplexity estimate based on a held-out corpus of 126 documents with 3393 words
2017-08-01 19:03:32,022 [MainThread  ] [INFO ]  PROGRESS: pass 5, at document #14126/14126
2017-08-01 19:03:32,083 [MainThread  ] [INFO ]  merging changes from 126 documents into a model of 14126 documents
2017-08-01 19:03:32,769 [MainThread  ] [INFO ]  topic #8 (0

2017-08-01 19:03:38,754 [MainThread  ] [INFO ]  topic #2 (0.100): 0.006*"of_the" + 0.005*"ole_chemic" + 0.005*"shall_be" + 0.005*"thi_agreement" + 0.004*"to_the" + 0.004*"the_softwar" + 0.004*"the_parti" + 0.003*"exhibit_a" + 0.003*"of_thi" + 0.003*"item_of"
2017-08-01 19:03:38,756 [MainThread  ] [INFO ]  topic diff=0.139851, rho=0.266662
2017-08-01 19:03:38,757 [MainThread  ] [INFO ]  PROGRESS: pass 6, at document #10000/14126
2017-08-01 19:03:39,786 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:03:40,341 [MainThread  ] [INFO ]  topic #7 (0.100): 0.016*"of_the" + 0.011*"thi_agreement" + 0.010*"the_term" + 0.010*"set_forth" + 0.009*"to_the" + 0.008*"in_the" + 0.008*"of_thi" + 0.007*"term_and" + 0.007*"forth_in" + 0.006*"shall_be"
2017-08-01 19:03:40,343 [MainThread  ] [INFO ]  topic #1 (0.100): 0.009*"the_secur" + 0.008*"of_the" + 0.007*"portion_of" + 0.006*"with_the" + 0.006*"copi_of" + 0.006*"confidenti_treatment" + 0.005*"s

2017-08-01 19:03:46,251 [MainThread  ] [INFO ]  topic diff=0.172294, rho=0.257658
2017-08-01 19:03:46,252 [MainThread  ] [INFO ]  PROGRESS: pass 7, at document #4000/14126
2017-08-01 19:03:47,202 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:03:47,621 [MainThread  ] [INFO ]  topic #8 (0.100): 0.020*"of_the" + 0.007*"of_ani" + 0.007*"of_such" + 0.005*"to_the" + 0.005*"of_it" + 0.005*"ani_such" + 0.005*"and_expens" + 0.005*"is_not" + 0.005*"shall_not" + 0.004*"ani_of"
2017-08-01 19:03:47,623 [MainThread  ] [INFO ]  topic #0 (0.100): 0.015*"thi_agreement" + 0.009*"of_thi" + 0.007*"of_the" + 0.005*"shall_be" + 0.005*"the_facil" + 0.005*"the_event" + 0.004*"termin_of" + 0.004*"provis_of" + 0.004*"in_the" + 0.004*"under_thi"
2017-08-01 19:03:47,624 [MainThread  ] [INFO ]  topic #2 (0.100): 0.006*"of_the" + 0.005*"shall_be" + 0.005*"of_permit" + 0.005*"to_the" + 0.004*"thi_agreement" + 0.004*"of_thi" + 0.004*"and_each" + 0.004*"permi

2017-08-01 19:03:55,507 [MainThread  ] [INFO ]  topic #8 (0.100): 0.017*"of_the" + 0.008*"of_ani" + 0.006*"to_the" + 0.006*"thi_agreement" + 0.006*"of_such" + 0.005*"shall_not" + 0.005*"the_other" + 0.005*"of_it" + 0.004*"ani_of" + 0.004*"ani_such"
2017-08-01 19:03:55,508 [MainThread  ] [INFO ]  topic #2 (0.100): 0.006*"of_the" + 0.005*"shall_be" + 0.005*"thi_agreement" + 0.004*"to_the" + 0.004*"and_support" + 0.004*"of_thi" + 0.004*"the_parti" + 0.004*"exhibit_a" + 0.003*"for_the" + 0.003*"licens_fee"
2017-08-01 19:03:55,510 [MainThread  ] [INFO ]  topic #5 (0.100): 0.010*"of_the" + 0.006*"for_the" + 0.006*"the_softwar" + 0.005*"to_the" + 0.004*"busi_procedur" + 0.003*"paid_us" + 0.003*"for_a" + 0.003*"common_stock" + 0.003*"shall_be" + 0.003*"will_be"
2017-08-01 19:03:55,512 [MainThread  ] [INFO ]  topic #6 (0.100): 0.011*"of_the" + 0.010*"to_the" + 0.010*"thi_agreement" + 0.006*"the_parti" + 0.006*"with_respect" + 0.005*"respect_to" + 0.004*"enter_into" + 0.004*"and_the" + 0.003*"ag

2017-08-01 19:04:02,645 [MainThread  ] [INFO ]  topic #0 (0.100): 0.017*"thi_agreement" + 0.009*"of_thi" + 0.007*"of_the" + 0.005*"shall_be" + 0.005*"provis_of" + 0.004*"thi_section" + 0.004*"under_thi" + 0.004*"the_softwar" + 0.004*"in_the" + 0.004*"the_event"
2017-08-01 19:04:02,647 [MainThread  ] [INFO ]  topic #4 (0.100): 0.013*"of_the" + 0.007*"thi_leas" + 0.006*"the_premis" + 0.006*"to_the" + 0.005*"shall_be" + 0.005*"tenant_shall" + 0.003*"of_ani" + 0.003*"the_compani" + 0.003*"in_the" + 0.003*"by_tenant"
2017-08-01 19:04:02,649 [MainThread  ] [INFO ]  topic #8 (0.100): 0.018*"of_the" + 0.008*"of_ani" + 0.006*"of_such" + 0.006*"to_the" + 0.005*"thi_agreement" + 0.005*"of_it" + 0.005*"shall_not" + 0.004*"ani_such" + 0.004*"ani_of" + 0.004*"is_not"
2017-08-01 19:04:02,651 [MainThread  ] [INFO ]  topic diff=0.127116, rho=0.249509
2017-08-01 19:04:02,652 [MainThread  ] [INFO ]  PROGRESS: pass 8, at document #10000/14126
2017-08-01 19:04:03,667 [MainThread  ] [INFO ]  merging changes

2017-08-01 19:04:09,958 [MainThread  ] [INFO ]  topic #1 (0.100): 0.013*"the_secur" + 0.012*"of_the" + 0.011*"portion_of" + 0.009*"j_clark" + 0.009*"david_j" + 0.008*"secur_and" + 0.008*"pursuant_to" + 0.008*"with_the" + 0.008*"confidenti_treatment" + 0.007*"to_a"
2017-08-01 19:04:09,960 [MainThread  ] [INFO ]  topic #6 (0.100): 0.012*"of_the" + 0.010*"to_the" + 0.007*"the_issuer" + 0.007*"thi_agreement" + 0.006*"enter_into" + 0.005*"the_parti" + 0.005*"with_respect" + 0.004*"aggreg_princip" + 0.004*"respect_to" + 0.004*"holder_of"
2017-08-01 19:04:09,962 [MainThread  ] [INFO ]  topic diff=0.157721, rho=0.242087
2017-08-01 19:04:09,964 [MainThread  ] [INFO ]  PROGRESS: pass 9, at document #4000/14126
2017-08-01 19:04:10,902 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:04:11,296 [MainThread  ] [INFO ]  topic #7 (0.100): 0.015*"of_the" + 0.011*"the_term" + 0.010*"thi_agreement" + 0.009*"in_the" + 0.009*"to_the" + 0.009*"set_for

2017-08-01 19:04:17,406 [MainThread  ] [INFO ]  topic #8 (0.100): 0.018*"of_the" + 0.008*"of_ani" + 0.006*"of_such" + 0.006*"to_the" + 0.005*"shall_not" + 0.005*"thi_agreement" + 0.005*"of_it" + 0.004*"the_other" + 0.004*"ani_of" + 0.004*"ani_such"
2017-08-01 19:04:17,408 [MainThread  ] [INFO ]  topic diff=0.112850, rho=0.242087
2017-08-01 19:04:17,410 [MainThread  ] [INFO ]  PROGRESS: pass 9, at document #14000/14126
2017-08-01 19:04:18,451 [MainThread  ] [INFO ]  merging changes from 2000 documents into a model of 14126 documents
2017-08-01 19:04:18,997 [MainThread  ] [INFO ]  topic #2 (0.100): 0.006*"of_the" + 0.005*"shall_be" + 0.005*"thi_agreement" + 0.004*"to_the" + 0.004*"and_support" + 0.004*"of_thi" + 0.004*"the_dental" + 0.004*"exhibit_a" + 0.004*"the_parti" + 0.004*"dental_practic"
2017-08-01 19:04:18,998 [MainThread  ] [INFO ]  topic #4 (0.100): 0.014*"of_the" + 0.010*"thi_leas" + 0.009*"the_premis" + 0.007*"tenant_shall" + 0.006*"the_leas" + 0.006*"to_the" + 0.006*"shall_b

## Output top topics

In [19]:
for topic in ldamodel.print_topics(num_topics=25, num_words=5):
    print(topic)

2017-08-01 19:04:20,176 [MainThread  ] [INFO ]  topic #0 (0.100): 0.013*"thi_agreement" + 0.009*"of_thi" + 0.008*"of_the" + 0.005*"provis_of" + 0.005*"payment_of"
2017-08-01 19:04:20,178 [MainThread  ] [INFO ]  topic #1 (0.100): 0.013*"j_clark" + 0.013*"david_j" + 0.010*"expens_in" + 0.008*"athyrium_opportun" + 0.007*"s_david"
2017-08-01 19:04:20,179 [MainThread  ] [INFO ]  topic #2 (0.100): 0.007*"of_permit" + 0.006*"of_the" + 0.006*"permit_debt" + 0.005*"and_each" + 0.005*"shall_be"
2017-08-01 19:04:20,181 [MainThread  ] [INFO ]  topic #3 (0.100): 0.010*"by_s" + 0.008*"of_the" + 0.006*"shall_be" + 0.005*"to_the" + 0.005*"definit_of"
2017-08-01 19:04:20,182 [MainThread  ] [INFO ]  topic #4 (0.100): 0.014*"of_the" + 0.009*"thi_leas" + 0.008*"the_premis" + 0.007*"gener_partner" + 0.006*"tenant_shall"
2017-08-01 19:04:20,184 [MainThread  ] [INFO ]  topic #5 (0.100): 0.011*"of_the" + 0.006*"for_the" + 0.005*"the_softwar" + 0.005*"the_form" + 0.004*"to_the"
2017-08-01 19:04:20,186 [MainThr

(0, '0.013*"thi_agreement" + 0.009*"of_thi" + 0.008*"of_the" + 0.005*"provis_of" + 0.005*"payment_of"')
(1, '0.013*"j_clark" + 0.013*"david_j" + 0.010*"expens_in" + 0.008*"athyrium_opportun" + 0.007*"s_david"')
(2, '0.007*"of_permit" + 0.006*"of_the" + 0.006*"permit_debt" + 0.005*"and_each" + 0.005*"shall_be"')
(3, '0.010*"by_s" + 0.008*"of_the" + 0.006*"shall_be" + 0.005*"to_the" + 0.005*"definit_of"')
(4, '0.014*"of_the" + 0.009*"thi_leas" + 0.008*"the_premis" + 0.007*"gener_partner" + 0.006*"tenant_shall"')
(5, '0.011*"of_the" + 0.006*"for_the" + 0.005*"the_softwar" + 0.005*"the_form" + 0.004*"to_the"')
(6, '0.013*"of_the" + 0.011*"to_the" + 0.008*"the_issuer" + 0.007*"enter_into" + 0.006*"thi_agreement"')
(7, '0.017*"of_the" + 0.011*"the_term" + 0.010*"ani_credit" + 0.010*"set_forth" + 0.009*"to_the"')
(8, '0.023*"of_the" + 0.008*"of_such" + 0.007*"of_ani" + 0.007*"and_expens" + 0.007*"is_not"')
(9, '0.018*"credit_parti" + 0.018*"such_debt" + 0.010*"by_ani" + 0.008*"in_the" + 0.008