### LDA Probabilistic Modeling
- LDA + CountVectorize (3 topics)
- LDA + CountVectorize (4 topics)
- Don't use TF-IDF with LDA

In [19]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.util import ngrams
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
import re

from gensim import corpora, models, similarities, matutils
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/jennihawk/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [20]:
#got tweets at 1:15 Pacific Time 10/14/22
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science/NLP_Unsupervised Learning/Project_NLP/TweetBatch3.csv')
tweets

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...
...,...,...
34988,RT @Adrian_Fontes: The January 6th committee j...,rt fontes the january 6th committee just concl...
34989,#January6thCommitteeHearings and everyone runn...,january6thcommitteehearings and everyone runn...
34990,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
34991,So they are gonna subpoena Trump I am guessing...,so they are gonna subpoena trump am guessing t...


### Remove stop words from cleaned column

In [21]:
stop_words = stopwords.words('english')
tweets['cleaned'] = tweets['cleaned'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))

In [22]:
#tweets.head()

### Tokenize Words in Cleaned Column

In [23]:
tweets['tokenized'] = tweets.apply(lambda row: nltk.word_tokenize(row['cleaned']), axis=1)

In [24]:
tweets.head()

Unnamed: 0,text,cleaned,tokenized
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember forthepeople votebluein2022 standwit...,"[roevember, forthepeople, votebluein2022, stan..."
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt thing pathetic acting sec defense chris mil...,"[rt, thing, pathetic, acting, sec, defense, ch..."
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt thing pathetic acting sec defense chris mil...,"[rt, thing, pathetic, acting, sec, defense, ch..."
3,RT @tleehumphrey: Today is the beginning of th...,rt today beginning inquiry trudeau gov use eme...,"[rt, today, beginning, inquiry, trudeau, gov, ..."
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy knew trump r...,"[rt, mitch, mcconnell, kevin, mccarthy, knew, ..."


## Countvectorizer + LDA 

### Countvectorizer
- a method to convert text to numerical data
- By default, Countvectorizer converts the text to lowercase and uses word-level tokenization.
- if using 'tokenize' column  for loops or list comprehension will help handle lists of lists. CountVectorizer takes list. 
- Countervectorize will do word tokenization not other types of tokenization

In [25]:
corpus = list(tweets.cleaned)

In [26]:
#turn corpus into a document-term matrix
vectorizer = CountVectorizer()
doc_term = vectorizer.fit_transform(corpus)
doc_term_df = pd.DataFrame(doc_term.toarray(), columns=vectorizer.get_feature_names())



In [27]:
doc_term_df.shape

(34993, 9404)

In [28]:
#doc_term_df.iloc[:, : 100]

### LDA: Reduce Dimensionality 
- Probabilistic Modeling using gensim
- Increase the number of passes to get more stable results.

#### Fit LDA Model - 4 Topics

In [31]:
#Fit an LDA model using `LdaModel` with num_topics=tbd topics. Set the `passes` hyperparameter to 10 so that the corpus will be scanned 10 times. Save the fitted model as `lda`.
#NOTE: This may take a few minutes to run. Take a look at the log while you're waiting.
term_doc = doc_term.transpose()
corpus = matutils.Sparse2Corpus(term_doc)
id2word = dict((v, k) for k, v in vectorizer.vocabulary_.items())
lda = models.LdaModel(corpus=corpus, num_topics=4, id2word=id2word, passes=10)
### END SOLUTION
lda

2022-10-20 14:58:55,248 : INFO : using symmetric alpha at 0.25
2022-10-20 14:58:55,250 : INFO : using symmetric eta at 0.25
2022-10-20 14:58:55,252 : INFO : using serial LDA version on this node
2022-10-20 14:58:55,257 : INFO : running online (multi-pass) LDA training, 4 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-20 14:58:55,266 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-20 14:58:56,370 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:58:56,374 : INFO : topic #0 (0.250): 0.078*"rt" + 0.032*"trump" + 0.031*"knew" + 0.025*"january6thcommitteehearings" + 0.023*"mark" + 0.020*"democracy" + 0.019*"mccarthy" + 0.019*"mcconnell" + 0.019*"responsible" + 0.018*"called"
2022-10-20 14:58:56,375 : INFO : topic #1 (0.250): 0.097*"trump" + 0.059*"rt" + 0.024*"january6thcommit

2022-10-20 14:58:58,765 : INFO : topic #3 (0.250): 0.073*"rt" + 0.047*"january6thcommitteehearings" + 0.023*"trump" + 0.018*"people" + 0.016*"democracy" + 0.014*"chris" + 0.013*"miller" + 0.013*"accept" + 0.013*"american" + 0.013*"thing"
2022-10-20 14:58:58,766 : INFO : topic diff=0.364692, rho=0.377964
2022-10-20 14:58:58,775 : INFO : PROGRESS: pass 0, at document #16000/34993
2022-10-20 14:58:59,099 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:58:59,103 : INFO : topic #0 (0.250): 0.085*"rt" + 0.070*"knew" + 0.064*"trump" + 0.049*"called" + 0.048*"mcconnell" + 0.048*"responsible" + 0.048*"mccarthy" + 0.048*"kevin" + 0.048*"mitch" + 0.048*"backed"
2022-10-20 14:58:59,104 : INFO : topic #1 (0.250): 0.113*"trump" + 0.069*"rt" + 0.024*"donald" + 0.020*"january6thcommitteehearings" + 0.018*"january" + 0.016*"election" + 0.016*"oath" + 0.015*"6th" + 0.014*"voted" + 0.014*"subpoena"
2022-10-20 14:58:59,105 : INFO : topic #2 (0.250): 0.090*"rt" + 

2022-10-20 14:59:01,764 : INFO : topic #3 (0.250): 0.074*"rt" + 0.050*"january6thcommitteehearings" + 0.020*"trump" + 0.014*"thing" + 0.014*"people" + 0.014*"national" + 0.014*"chris" + 0.013*"person" + 0.013*"miller" + 0.013*"acting"
2022-10-20 14:59:01,764 : INFO : topic diff=0.298991, rho=0.267261
2022-10-20 14:59:01,775 : INFO : PROGRESS: pass 0, at document #30000/34993
2022-10-20 14:59:02,107 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:02,111 : INFO : topic #0 (0.250): 0.082*"rt" + 0.074*"knew" + 0.068*"trump" + 0.044*"responsible" + 0.044*"backed" + 0.043*"mcconnell" + 0.043*"called" + 0.043*"mitch" + 0.043*"mccarthy" + 0.043*"kevin"
2022-10-20 14:59:02,112 : INFO : topic #1 (0.250): 0.112*"trump" + 0.071*"rt" + 0.027*"donald" + 0.022*"oath" + 0.018*"subpoena" + 0.016*"voted" + 0.016*"january" + 0.016*"democracy" + 0.015*"testify" + 0.014*"january6thcommitteehearings"
2022-10-20 14:59:02,113 : INFO : topic #2 (0.250): 0.071*"rt" 

2022-10-20 14:59:04,255 : INFO : topic #3 (0.250): 0.067*"rt" + 0.062*"january6thcommitteehearings" + 0.018*"trump" + 0.015*"people" + 0.013*"thing" + 0.013*"chris" + 0.013*"democracy" + 0.012*"national" + 0.012*"miller" + 0.011*"person"
2022-10-20 14:59:04,256 : INFO : topic diff=0.283719, rho=0.226476
2022-10-20 14:59:04,266 : INFO : PROGRESS: pass 1, at document #8000/34993
2022-10-20 14:59:04,576 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:04,580 : INFO : topic #0 (0.250): 0.084*"rt" + 0.070*"knew" + 0.060*"trump" + 0.046*"responsible" + 0.046*"called" + 0.046*"mccarthy" + 0.046*"mcconnell" + 0.046*"kevin" + 0.046*"mitch" + 0.046*"backed"
2022-10-20 14:59:04,581 : INFO : topic #1 (0.250): 0.107*"trump" + 0.070*"rt" + 0.024*"donald" + 0.019*"january" + 0.018*"oath" + 0.018*"january6thcommitteehearings" + 0.017*"subpoena" + 0.016*"6th" + 0.015*"voted" + 0.014*"democracy"
2022-10-20 14:59:04,582 : INFO : topic #2 (0.250): 0.078*"rt" + 

2022-10-20 14:59:07,060 : INFO : topic #3 (0.250): 0.071*"rt" + 0.056*"january6thcommitteehearings" + 0.023*"trump" + 0.016*"people" + 0.014*"democracy" + 0.012*"american" + 0.012*"tried" + 0.012*"national" + 0.011*"accept" + 0.011*"chris"
2022-10-20 14:59:07,061 : INFO : topic diff=0.239656, rho=0.226476
2022-10-20 14:59:07,072 : INFO : PROGRESS: pass 1, at document #22000/34993
2022-10-20 14:59:07,361 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:07,364 : INFO : topic #0 (0.250): 0.084*"rt" + 0.075*"knew" + 0.068*"trump" + 0.049*"called" + 0.048*"responsible" + 0.048*"mcconnell" + 0.048*"mccarthy" + 0.048*"mitch" + 0.048*"kevin" + 0.048*"backed"
2022-10-20 14:59:07,365 : INFO : topic #1 (0.250): 0.112*"trump" + 0.070*"rt" + 0.028*"donald" + 0.019*"oath" + 0.018*"subpoena" + 0.018*"january" + 0.017*"voted" + 0.015*"testify" + 0.015*"democracy" + 0.015*"election"
2022-10-20 14:59:07,366 : INFO : topic #2 (0.250): 0.078*"rt" + 0.047*"janua

2022-10-20 14:59:09,230 : INFO : topic diff=0.169712, rho=0.226476
2022-10-20 14:59:09,528 : INFO : -5.597 per-word bound, 48.4 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 14:59:09,528 : INFO : PROGRESS: pass 1, at document #34993/34993
2022-10-20 14:59:09,678 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 14:59:09,681 : INFO : topic #0 (0.250): 0.084*"rt" + 0.084*"knew" + 0.071*"trump" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"called" + 0.049*"mitch" + 0.048*"backed" + 0.048*"mccarthy" + 0.048*"kevin"
2022-10-20 14:59:09,682 : INFO : topic #1 (0.250): 0.112*"trump" + 0.073*"rt" + 0.027*"donald" + 0.024*"oath" + 0.020*"subpoena" + 0.019*"voted" + 0.018*"democracy" + 0.017*"january" + 0.016*"testify" + 0.016*"history"
2022-10-20 14:59:09,684 : INFO : topic #2 (0.250): 0.074*"rt" + 0.039*"january6thcommitteehearings" + 0.029*"pelosi" + 0.017*"trump" + 0.013*"armed" + 0.013*"white" + 0.011*"sa

2022-10-20 14:59:11,764 : INFO : topic diff=0.188837, rho=0.220882
2022-10-20 14:59:11,775 : INFO : PROGRESS: pass 2, at document #14000/34993
2022-10-20 14:59:12,077 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:12,082 : INFO : topic #0 (0.250): 0.084*"rt" + 0.070*"knew" + 0.063*"trump" + 0.047*"called" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"mccarthy" + 0.047*"kevin" + 0.046*"mitch" + 0.046*"backed"
2022-10-20 14:59:12,083 : INFO : topic #1 (0.250): 0.113*"trump" + 0.072*"rt" + 0.025*"donald" + 0.019*"january" + 0.018*"oath" + 0.016*"subpoena" + 0.016*"6th" + 0.015*"voted" + 0.015*"january6thcommitteehearings" + 0.015*"democracy"
2022-10-20 14:59:12,084 : INFO : topic #2 (0.250): 0.083*"rt" + 0.038*"january6thcommitteehearings" + 0.032*"pelosi" + 0.016*"chair" + 0.016*"amp" + 0.016*"trump" + 0.014*"trumpcoupattempt" + 0.013*"says" + 0.013*"law" + 0.013*"want"
2022-10-20 14:59:12,085 : INFO : topic #3 (0.250): 0.069*"rt" + 0.0

2022-10-20 14:59:14,559 : INFO : topic diff=0.190477, rho=0.220882
2022-10-20 14:59:14,571 : INFO : PROGRESS: pass 2, at document #28000/34993
2022-10-20 14:59:14,882 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:14,886 : INFO : topic #0 (0.250): 0.083*"rt" + 0.079*"knew" + 0.069*"trump" + 0.045*"called" + 0.045*"responsible" + 0.045*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed"
2022-10-20 14:59:14,887 : INFO : topic #1 (0.250): 0.111*"trump" + 0.072*"rt" + 0.028*"donald" + 0.022*"oath" + 0.018*"subpoena" + 0.017*"voted" + 0.017*"january" + 0.016*"democracy" + 0.015*"testify" + 0.014*"election"
2022-10-20 14:59:14,888 : INFO : topic #2 (0.250): 0.069*"rt" + 0.040*"january6thcommitteehearings" + 0.028*"pelosi" + 0.015*"trump" + 0.013*"armed" + 0.013*"says" + 0.013*"white" + 0.012*"law" + 0.012*"violent" + 0.011*"failed"
2022-10-20 14:59:14,889 : INFO : topic #3 (0.250): 0.070*"rt" + 0.054*"january6thcommit

2022-10-20 14:59:16,966 : INFO : topic diff=0.210515, rho=0.215683
2022-10-20 14:59:16,976 : INFO : PROGRESS: pass 3, at document #6000/34993
2022-10-20 14:59:17,282 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:17,286 : INFO : topic #0 (0.250): 0.084*"rt" + 0.073*"knew" + 0.063*"trump" + 0.048*"called" + 0.048*"responsible" + 0.047*"mcconnell" + 0.047*"mitch" + 0.047*"mccarthy" + 0.047*"backed" + 0.047*"kevin"
2022-10-20 14:59:17,287 : INFO : topic #1 (0.250): 0.109*"trump" + 0.072*"rt" + 0.024*"donald" + 0.019*"oath" + 0.019*"january" + 0.018*"subpoena" + 0.016*"6th" + 0.016*"voted" + 0.015*"january6thcommitteehearings" + 0.015*"democracy"
2022-10-20 14:59:17,288 : INFO : topic #2 (0.250): 0.076*"rt" + 0.035*"january6thcommitteehearings" + 0.033*"pelosi" + 0.018*"chair" + 0.017*"nancy" + 0.016*"trump" + 0.015*"amp" + 0.014*"january" + 0.013*"january6th" + 0.012*"want"
2022-10-20 14:59:17,289 : INFO : topic #3 (0.250): 0.066*"rt" + 0.062

2022-10-20 14:59:19,675 : INFO : -5.778 per-word bound, 54.9 perplexity estimate based on a held-out corpus of 2000 documents with 23438 words
2022-10-20 14:59:19,676 : INFO : PROGRESS: pass 3, at document #20000/34993
2022-10-20 14:59:19,967 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:19,970 : INFO : topic #0 (0.250): 0.084*"rt" + 0.077*"knew" + 0.067*"trump" + 0.050*"called" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"mccarthy" + 0.049*"mitch" + 0.049*"kevin" + 0.049*"backed"
2022-10-20 14:59:19,971 : INFO : topic #1 (0.250): 0.111*"trump" + 0.071*"rt" + 0.028*"donald" + 0.019*"oath" + 0.018*"subpoena" + 0.018*"january" + 0.018*"election" + 0.017*"voted" + 0.016*"democracy" + 0.014*"testify"
2022-10-20 14:59:19,972 : INFO : topic #2 (0.250): 0.080*"rt" + 0.046*"january6thcommitteehearings" + 0.033*"pelosi" + 0.016*"trump" + 0.015*"trumpcoupattempt" + 0.014*"amp" + 0.013*"nancy" + 0.013*"says" + 0.011*"law" + 0.010*"want"
2022-1

2022-10-20 14:59:21,835 : INFO : PROGRESS: pass 3, at document #34000/34993
2022-10-20 14:59:22,125 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:22,129 : INFO : topic #0 (0.250): 0.086*"knew" + 0.084*"rt" + 0.071*"trump" + 0.048*"responsible" + 0.048*"mcconnell" + 0.048*"called" + 0.048*"mitch" + 0.048*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 14:59:22,130 : INFO : topic #1 (0.250): 0.112*"trump" + 0.074*"rt" + 0.028*"donald" + 0.024*"oath" + 0.019*"subpoena" + 0.018*"voted" + 0.017*"january" + 0.017*"democracy" + 0.015*"testify" + 0.015*"history"
2022-10-20 14:59:22,131 : INFO : topic #2 (0.250): 0.073*"rt" + 0.038*"january6thcommitteehearings" + 0.029*"pelosi" + 0.017*"trump" + 0.013*"armed" + 0.013*"white" + 0.012*"violent" + 0.012*"capitol" + 0.012*"law" + 0.011*"says"
2022-10-20 14:59:22,132 : INFO : topic #3 (0.250): 0.074*"rt" + 0.056*"january6thcommitteehearings" + 0.018*"trump" + 0.015*"thing" + 0.015*"chris" + 0.01

2022-10-20 14:59:24,286 : INFO : PROGRESS: pass 4, at document #12000/34993
2022-10-20 14:59:24,571 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:24,575 : INFO : topic #0 (0.250): 0.084*"rt" + 0.068*"knew" + 0.062*"trump" + 0.047*"called" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"mccarthy" + 0.047*"kevin" + 0.047*"mitch" + 0.047*"backed"
2022-10-20 14:59:24,576 : INFO : topic #1 (0.250): 0.112*"trump" + 0.072*"rt" + 0.025*"donald" + 0.019*"january" + 0.018*"oath" + 0.017*"subpoena" + 0.017*"6th" + 0.016*"voted" + 0.015*"violence" + 0.015*"attack"
2022-10-20 14:59:24,577 : INFO : topic #2 (0.250): 0.082*"rt" + 0.037*"january6thcommitteehearings" + 0.032*"pelosi" + 0.020*"chair" + 0.017*"amp" + 0.016*"trump" + 0.015*"want" + 0.014*"nancy" + 0.014*"trumpcoupattempt" + 0.013*"law"
2022-10-20 14:59:24,578 : INFO : topic #3 (0.250): 0.067*"rt" + 0.056*"january6thcommitteehearings" + 0.021*"trump" + 0.016*"people" + 0.013*"democracy" + 

2022-10-20 14:59:26,985 : INFO : PROGRESS: pass 4, at document #26000/34993
2022-10-20 14:59:27,264 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:27,268 : INFO : topic #0 (0.250): 0.083*"rt" + 0.078*"knew" + 0.069*"trump" + 0.047*"called" + 0.046*"mcconnell" + 0.046*"responsible" + 0.046*"mccarthy" + 0.046*"mitch" + 0.046*"kevin" + 0.046*"backed"
2022-10-20 14:59:27,269 : INFO : topic #1 (0.250): 0.114*"trump" + 0.073*"rt" + 0.029*"donald" + 0.021*"oath" + 0.019*"subpoena" + 0.018*"january" + 0.018*"voted" + 0.016*"democracy" + 0.016*"testify" + 0.015*"election"
2022-10-20 14:59:27,270 : INFO : topic #2 (0.250): 0.071*"rt" + 0.044*"january6thcommitteehearings" + 0.030*"pelosi" + 0.017*"trump" + 0.016*"says" + 0.013*"armed" + 0.012*"trumpcoupattempt" + 0.012*"law" + 0.012*"violent" + 0.011*"white"
2022-10-20 14:59:27,271 : INFO : topic #3 (0.250): 0.069*"rt" + 0.053*"january6thcommitteehearings" + 0.022*"trump" + 0.014*"people" + 0.013*"de

2022-10-20 14:59:29,289 : INFO : PROGRESS: pass 5, at document #4000/34993
2022-10-20 14:59:29,592 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:29,595 : INFO : topic #0 (0.250): 0.084*"rt" + 0.076*"knew" + 0.066*"trump" + 0.048*"responsible" + 0.048*"called" + 0.048*"mcconnell" + 0.048*"mitch" + 0.048*"backed" + 0.048*"mccarthy" + 0.048*"kevin"
2022-10-20 14:59:29,597 : INFO : topic #1 (0.250): 0.112*"trump" + 0.073*"rt" + 0.026*"donald" + 0.022*"oath" + 0.020*"subpoena" + 0.020*"january" + 0.017*"voted" + 0.016*"democracy" + 0.016*"6th" + 0.015*"attack"
2022-10-20 14:59:29,598 : INFO : topic #2 (0.250): 0.076*"rt" + 0.036*"january6thcommitteehearings" + 0.034*"pelosi" + 0.018*"nancy" + 0.017*"trump" + 0.014*"january" + 0.013*"knew" + 0.012*"amp" + 0.011*"chair" + 0.010*"law"
2022-10-20 14:59:29,599 : INFO : topic #3 (0.250): 0.069*"rt" + 0.061*"january6thcommitteehearings" + 0.018*"trump" + 0.013*"people" + 0.012*"chris" + 0.012*"thing"

2022-10-20 14:59:31,723 : INFO : topic #0 (0.250): 0.085*"rt" + 0.076*"knew" + 0.066*"trump" + 0.050*"called" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"mccarthy" + 0.049*"mitch" + 0.049*"kevin" + 0.049*"backed"
2022-10-20 14:59:31,725 : INFO : topic #1 (0.250): 0.113*"trump" + 0.072*"rt" + 0.028*"donald" + 0.019*"oath" + 0.018*"january" + 0.018*"election" + 0.017*"subpoena" + 0.017*"voted" + 0.015*"democracy" + 0.015*"6th"
2022-10-20 14:59:31,725 : INFO : topic #2 (0.250): 0.083*"rt" + 0.045*"january6thcommitteehearings" + 0.033*"pelosi" + 0.016*"trump" + 0.015*"trumpcoupattempt" + 0.015*"amp" + 0.014*"nancy" + 0.014*"says" + 0.012*"law" + 0.011*"chair"
2022-10-20 14:59:31,726 : INFO : topic #3 (0.250): 0.070*"rt" + 0.057*"january6thcommitteehearings" + 0.023*"trump" + 0.015*"people" + 0.013*"democracy" + 0.011*"american" + 0.011*"tried" + 0.011*"national" + 0.011*"chris" + 0.011*"thing"
2022-10-20 14:59:31,727 : INFO : topic diff=0.181779, rho=0.206300
2022-10-20 14:59:32,314

2022-10-20 14:59:34,426 : INFO : topic #0 (0.250): 0.083*"rt" + 0.081*"knew" + 0.071*"trump" + 0.047*"responsible" + 0.047*"called" + 0.046*"mcconnell" + 0.046*"backed" + 0.046*"mitch" + 0.046*"mccarthy" + 0.046*"kevin"
2022-10-20 14:59:34,427 : INFO : topic #1 (0.250): 0.115*"trump" + 0.073*"rt" + 0.027*"donald" + 0.023*"oath" + 0.018*"subpoena" + 0.017*"january" + 0.017*"voted" + 0.016*"democracy" + 0.015*"testify" + 0.014*"attack"
2022-10-20 14:59:34,428 : INFO : topic #2 (0.250): 0.072*"rt" + 0.039*"january6thcommitteehearings" + 0.027*"pelosi" + 0.016*"trump" + 0.014*"armed" + 0.013*"violent" + 0.013*"white" + 0.012*"law" + 0.012*"says" + 0.011*"trumpcoupattempt"
2022-10-20 14:59:34,429 : INFO : topic #3 (0.250): 0.071*"rt" + 0.056*"january6thcommitteehearings" + 0.019*"trump" + 0.014*"thing" + 0.014*"chris" + 0.013*"national" + 0.013*"miller" + 0.013*"person" + 0.013*"acting" + 0.013*"sec"
2022-10-20 14:59:34,430 : INFO : topic diff=0.151234, rho=0.206300
2022-10-20 14:59:34,440 

2022-10-20 14:59:36,851 : INFO : topic #1 (0.250): 0.114*"trump" + 0.072*"rt" + 0.025*"donald" + 0.020*"january" + 0.019*"oath" + 0.018*"subpoena" + 0.017*"6th" + 0.016*"voted" + 0.015*"violence" + 0.015*"attack"
2022-10-20 14:59:36,852 : INFO : topic #2 (0.250): 0.080*"rt" + 0.036*"january6thcommitteehearings" + 0.031*"pelosi" + 0.024*"chair" + 0.018*"amp" + 0.016*"trump" + 0.016*"want" + 0.015*"january6th" + 0.014*"nancy" + 0.014*"service"
2022-10-20 14:59:36,853 : INFO : topic #3 (0.250): 0.065*"rt" + 0.059*"january6thcommitteehearings" + 0.020*"trump" + 0.016*"people" + 0.013*"democracy" + 0.012*"chris" + 0.012*"thing" + 0.012*"national" + 0.012*"miller" + 0.011*"american"
2022-10-20 14:59:36,853 : INFO : topic diff=0.142927, rho=0.202045
2022-10-20 14:59:36,863 : INFO : PROGRESS: pass 6, at document #12000/34993
2022-10-20 14:59:37,147 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:37,151 : INFO : topic #0 (0.250): 0.084*"rt" + 0.069*

2022-10-20 14:59:39,545 : INFO : topic #1 (0.250): 0.113*"trump" + 0.073*"rt" + 0.029*"donald" + 0.020*"oath" + 0.018*"subpoena" + 0.017*"january" + 0.017*"voted" + 0.016*"election" + 0.015*"testify" + 0.015*"democracy"
2022-10-20 14:59:39,546 : INFO : topic #2 (0.250): 0.073*"rt" + 0.046*"january6thcommitteehearings" + 0.031*"pelosi" + 0.017*"trump" + 0.013*"trumpcoupattempt" + 0.012*"says" + 0.011*"amp" + 0.011*"nancy" + 0.011*"armed" + 0.011*"law"
2022-10-20 14:59:39,547 : INFO : topic #3 (0.250): 0.069*"rt" + 0.056*"january6thcommitteehearings" + 0.024*"trump" + 0.016*"people" + 0.014*"democracy" + 0.012*"american" + 0.012*"national" + 0.012*"tried" + 0.012*"thing" + 0.012*"chris"
2022-10-20 14:59:39,547 : INFO : topic diff=0.167068, rho=0.202045
2022-10-20 14:59:39,558 : INFO : PROGRESS: pass 6, at document #26000/34993
2022-10-20 14:59:39,835 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:39,839 : INFO : topic #0 (0.250): 0.083*"rt" 

2022-10-20 14:59:41,834 : INFO : topic #1 (0.250): 0.113*"trump" + 0.073*"rt" + 0.028*"donald" + 0.023*"oath" + 0.019*"subpoena" + 0.019*"january" + 0.018*"voted" + 0.017*"democracy" + 0.016*"testify" + 0.015*"history"
2022-10-20 14:59:41,835 : INFO : topic #2 (0.250): 0.074*"rt" + 0.037*"january6thcommitteehearings" + 0.032*"pelosi" + 0.018*"trump" + 0.015*"nancy" + 0.010*"law" + 0.010*"amp" + 0.010*"trumpcoupattempt" + 0.010*"says" + 0.010*"armed"
2022-10-20 14:59:41,836 : INFO : topic #3 (0.250): 0.071*"rt" + 0.061*"january6thcommitteehearings" + 0.018*"trump" + 0.013*"thing" + 0.012*"chris" + 0.012*"people" + 0.012*"national" + 0.011*"miller" + 0.011*"person" + 0.011*"acting"
2022-10-20 14:59:41,837 : INFO : topic diff=0.217530, rho=0.198043
2022-10-20 14:59:41,846 : INFO : PROGRESS: pass 7, at document #4000/34993
2022-10-20 14:59:42,147 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:42,150 : INFO : topic #0 (0.250): 0.084*"rt" + 0.07

2022-10-20 14:59:43,973 : INFO : topic #2 (0.250): 0.084*"rt" + 0.042*"january6thcommitteehearings" + 0.033*"pelosi" + 0.016*"trump" + 0.015*"amp" + 0.015*"trumpcoupattempt" + 0.014*"nancy" + 0.014*"chair" + 0.014*"says" + 0.012*"law"
2022-10-20 14:59:43,974 : INFO : topic #3 (0.250): 0.069*"rt" + 0.057*"january6thcommitteehearings" + 0.022*"trump" + 0.015*"people" + 0.013*"democracy" + 0.011*"chris" + 0.011*"national" + 0.011*"thing" + 0.011*"miller" + 0.011*"american"
2022-10-20 14:59:43,975 : INFO : topic diff=0.148506, rho=0.198043
2022-10-20 14:59:43,986 : INFO : PROGRESS: pass 7, at document #18000/34993
2022-10-20 14:59:44,278 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:44,282 : INFO : topic #0 (0.250): 0.085*"rt" + 0.076*"knew" + 0.066*"trump" + 0.050*"called" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"mccarthy" + 0.049*"mitch" + 0.049*"kevin" + 0.049*"backed"
2022-10-20 14:59:44,283 : INFO : topic #1 (0.250): 0.113*"tru

2022-10-20 14:59:46,694 : INFO : topic #2 (0.250): 0.071*"rt" + 0.040*"january6thcommitteehearings" + 0.027*"pelosi" + 0.016*"trump" + 0.014*"armed" + 0.013*"white" + 0.013*"law" + 0.012*"says" + 0.012*"violent" + 0.011*"failed"
2022-10-20 14:59:46,695 : INFO : topic #3 (0.250): 0.070*"rt" + 0.055*"january6thcommitteehearings" + 0.020*"trump" + 0.013*"thing" + 0.013*"chris" + 0.013*"people" + 0.013*"national" + 0.012*"miller" + 0.012*"person" + 0.012*"acting"
2022-10-20 14:59:46,696 : INFO : topic diff=0.166712, rho=0.198043
2022-10-20 14:59:46,706 : INFO : PROGRESS: pass 7, at document #32000/34993
2022-10-20 14:59:46,992 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:46,995 : INFO : topic #0 (0.250): 0.083*"rt" + 0.081*"knew" + 0.071*"trump" + 0.047*"responsible" + 0.047*"called" + 0.047*"mcconnell" + 0.047*"backed" + 0.046*"mitch" + 0.046*"mccarthy" + 0.046*"kevin"
2022-10-20 14:59:46,997 : INFO : topic #1 (0.250): 0.115*"trump" + 0.073

2022-10-20 14:59:49,014 : INFO : topic #3 (0.250): 0.064*"rt" + 0.060*"january6thcommitteehearings" + 0.020*"trump" + 0.014*"people" + 0.012*"democracy" + 0.012*"thing" + 0.012*"chris" + 0.011*"national" + 0.011*"miller" + 0.011*"person"
2022-10-20 14:59:49,015 : INFO : topic diff=0.184530, rho=0.194270
2022-10-20 14:59:49,025 : INFO : PROGRESS: pass 8, at document #10000/34993
2022-10-20 14:59:49,302 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:49,305 : INFO : topic #0 (0.250): 0.084*"rt" + 0.070*"knew" + 0.062*"trump" + 0.048*"responsible" + 0.048*"called" + 0.047*"mcconnell" + 0.047*"mccarthy" + 0.047*"mitch" + 0.047*"kevin" + 0.047*"backed"
2022-10-20 14:59:49,307 : INFO : topic #1 (0.250): 0.115*"trump" + 0.072*"rt" + 0.025*"donald" + 0.020*"january" + 0.019*"oath" + 0.018*"subpoena" + 0.017*"6th" + 0.016*"voted" + 0.016*"violence" + 0.015*"attack"
2022-10-20 14:59:49,308 : INFO : topic #2 (0.250): 0.080*"rt" + 0.037*"january6thcomm

2022-10-20 14:59:51,823 : INFO : topic #3 (0.250): 0.068*"rt" + 0.056*"january6thcommitteehearings" + 0.025*"trump" + 0.016*"people" + 0.014*"democracy" + 0.012*"american" + 0.012*"tried" + 0.011*"national" + 0.011*"accept" + 0.011*"break"
2022-10-20 14:59:51,823 : INFO : topic diff=0.158126, rho=0.194270
2022-10-20 14:59:51,833 : INFO : PROGRESS: pass 8, at document #24000/34993
2022-10-20 14:59:52,117 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:52,121 : INFO : topic #0 (0.250): 0.084*"rt" + 0.078*"knew" + 0.069*"trump" + 0.048*"called" + 0.048*"mcconnell" + 0.048*"responsible" + 0.048*"mccarthy" + 0.048*"mitch" + 0.048*"kevin" + 0.048*"backed"
2022-10-20 14:59:52,122 : INFO : topic #1 (0.250): 0.113*"trump" + 0.073*"rt" + 0.029*"donald" + 0.020*"oath" + 0.018*"subpoena" + 0.018*"january" + 0.017*"voted" + 0.016*"election" + 0.015*"testify" + 0.015*"democracy"
2022-10-20 14:59:52,123 : INFO : topic #2 (0.250): 0.074*"rt" + 0.046*"janua

2022-10-20 14:59:54,087 : INFO : topic #3 (0.250): 0.074*"rt" + 0.057*"january6thcommitteehearings" + 0.018*"trump" + 0.014*"chris" + 0.014*"thing" + 0.013*"national" + 0.013*"miller" + 0.013*"person" + 0.013*"sec" + 0.013*"acting"
2022-10-20 14:59:54,088 : INFO : topic diff=0.130213, rho=0.194270
2022-10-20 14:59:54,098 : INFO : PROGRESS: pass 9, at document #2000/34993
2022-10-20 14:59:54,424 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:54,428 : INFO : topic #0 (0.250): 0.084*"rt" + 0.080*"knew" + 0.069*"trump" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"called" + 0.048*"mitch" + 0.048*"backed" + 0.048*"mccarthy" + 0.048*"kevin"
2022-10-20 14:59:54,429 : INFO : topic #1 (0.250): 0.113*"trump" + 0.073*"rt" + 0.028*"donald" + 0.024*"oath" + 0.019*"subpoena" + 0.019*"january" + 0.018*"voted" + 0.017*"democracy" + 0.016*"testify" + 0.015*"history"
2022-10-20 14:59:54,430 : INFO : topic #2 (0.250): 0.074*"rt" + 0.037*"january6thcommi

2022-10-20 14:59:56,269 : INFO : topic diff=0.144207, rho=0.190705
2022-10-20 14:59:56,279 : INFO : PROGRESS: pass 9, at document #16000/34993
2022-10-20 14:59:56,555 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:56,558 : INFO : topic #0 (0.250): 0.085*"rt" + 0.074*"knew" + 0.065*"trump" + 0.050*"called" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"mccarthy" + 0.049*"kevin" + 0.049*"mitch" + 0.049*"backed"
2022-10-20 14:59:56,559 : INFO : topic #1 (0.250): 0.115*"trump" + 0.072*"rt" + 0.027*"donald" + 0.019*"january" + 0.018*"oath" + 0.017*"election" + 0.017*"subpoena" + 0.016*"6th" + 0.016*"voted" + 0.015*"violence"
2022-10-20 14:59:56,560 : INFO : topic #2 (0.250): 0.084*"rt" + 0.042*"january6thcommitteehearings" + 0.033*"pelosi" + 0.016*"trump" + 0.015*"amp" + 0.015*"trumpcoupattempt" + 0.014*"nancy" + 0.014*"chair" + 0.014*"says" + 0.012*"law"
2022-10-20 14:59:56,561 : INFO : topic #3 (0.250): 0.068*"rt" + 0.057*"january6thcommi

2022-10-20 14:59:58,955 : INFO : topic diff=0.177643, rho=0.190705
2022-10-20 14:59:58,966 : INFO : PROGRESS: pass 9, at document #30000/34993
2022-10-20 14:59:59,255 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 14:59:59,259 : INFO : topic #0 (0.250): 0.083*"rt" + 0.077*"knew" + 0.069*"trump" + 0.046*"responsible" + 0.046*"called" + 0.046*"backed" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin"
2022-10-20 14:59:59,260 : INFO : topic #1 (0.250): 0.114*"trump" + 0.073*"rt" + 0.028*"donald" + 0.022*"oath" + 0.019*"subpoena" + 0.017*"voted" + 0.017*"january" + 0.016*"democracy" + 0.015*"testify" + 0.014*"history"
2022-10-20 14:59:59,261 : INFO : topic #2 (0.250): 0.071*"rt" + 0.040*"january6thcommitteehearings" + 0.027*"pelosi" + 0.016*"trump" + 0.014*"armed" + 0.013*"white" + 0.013*"law" + 0.012*"says" + 0.012*"violent" + 0.011*"failed"
2022-10-20 14:59:59,262 : INFO : topic #3 (0.250): 0.070*"rt" + 0.055*"january6thcommitt

<gensim.models.ldamodel.LdaModel at 0x7fb02bf9f220>

### How to Optimize Topics:
- The results may look fuzzy though, so to clean them up, you have several options:
- Increase the number of passes to get more stable results.
- Change the number of topics
- Clean up the text more in the CountVectorizer step, such as adding to the stop word list, removing common words, etc.
- Spend a few minutes doing at least one of these things to make your model better before moving on.

In [32]:
lda.print_topics()

2022-10-20 15:00:00,331 : INFO : topic #0 (0.250): 0.084*"rt" + 0.084*"knew" + 0.071*"trump" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"called" + 0.049*"mitch" + 0.049*"backed" + 0.049*"mccarthy" + 0.049*"kevin"
2022-10-20 15:00:00,333 : INFO : topic #1 (0.250): 0.113*"trump" + 0.074*"rt" + 0.028*"donald" + 0.025*"oath" + 0.020*"subpoena" + 0.019*"voted" + 0.018*"january" + 0.017*"democracy" + 0.016*"testify" + 0.016*"history"
2022-10-20 15:00:00,334 : INFO : topic #2 (0.250): 0.074*"rt" + 0.039*"january6thcommitteehearings" + 0.029*"pelosi" + 0.017*"trump" + 0.013*"armed" + 0.012*"white" + 0.012*"says" + 0.011*"law" + 0.011*"trumpcoupattempt" + 0.011*"violent"
2022-10-20 15:00:00,335 : INFO : topic #3 (0.250): 0.073*"rt" + 0.057*"january6thcommitteehearings" + 0.018*"trump" + 0.014*"chris" + 0.014*"thing" + 0.013*"national" + 0.013*"miller" + 0.013*"person" + 0.013*"sec" + 0.013*"acting"


[(0,
  '0.084*"rt" + 0.084*"knew" + 0.071*"trump" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"called" + 0.049*"mitch" + 0.049*"backed" + 0.049*"mccarthy" + 0.049*"kevin"'),
 (1,
  '0.113*"trump" + 0.074*"rt" + 0.028*"donald" + 0.025*"oath" + 0.020*"subpoena" + 0.019*"voted" + 0.018*"january" + 0.017*"democracy" + 0.016*"testify" + 0.016*"history"'),
 (2,
  '0.074*"rt" + 0.039*"january6thcommitteehearings" + 0.029*"pelosi" + 0.017*"trump" + 0.013*"armed" + 0.012*"white" + 0.012*"says" + 0.011*"law" + 0.011*"trumpcoupattempt" + 0.011*"violent"'),
 (3,
  '0.073*"rt" + 0.057*"january6thcommitteehearings" + 0.018*"trump" + 0.014*"chris" + 0.014*"thing" + 0.013*"national" + 0.013*"miller" + 0.013*"person" + 0.013*"sec" + 0.013*"acting"')]

#### Fit LDA Model - 3 Topics

In [33]:
term_doc = doc_term.transpose()
corpus = matutils.Sparse2Corpus(term_doc)
id2word = dict((v, k) for k, v in vectorizer.vocabulary_.items())
lda2 = models.LdaModel(corpus=corpus, num_topics=3, id2word=id2word, passes=10)
### END SOLUTION
lda2

2022-10-20 15:15:10,110 : INFO : using symmetric alpha at 0.3333333333333333
2022-10-20 15:15:10,111 : INFO : using symmetric eta at 0.3333333333333333
2022-10-20 15:15:10,114 : INFO : using serial LDA version on this node
2022-10-20 15:15:10,118 : INFO : running online (multi-pass) LDA training, 3 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-20 15:15:10,126 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-20 15:15:11,326 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:11,329 : INFO : topic #0 (0.333): 0.062*"rt" + 0.042*"trump" + 0.026*"democracy" + 0.024*"january6thcommitteehearings" + 0.023*"violence" + 0.020*"knew" + 0.020*"gop" + 0.020*"maga" + 0.020*"january6thcomm" + 0.019*"country"
2022-10-20 15:15:11,330 : INFO : topic #1 (0.333): 0.084*"rt" + 0.052*"trump"

2022-10-20 15:15:14,679 : INFO : topic #1 (0.333): 0.094*"rt" + 0.072*"trump" + 0.065*"knew" + 0.052*"called" + 0.051*"responsible" + 0.051*"mcconnell" + 0.051*"mccarthy" + 0.051*"mitch" + 0.051*"kevin" + 0.051*"backed"
2022-10-20 15:15:14,680 : INFO : topic #2 (0.333): 0.072*"rt" + 0.068*"trump" + 0.019*"donald" + 0.018*"january" + 0.016*"january6thcommitteehearings" + 0.016*"video" + 0.014*"lost" + 0.014*"roger" + 0.014*"stone" + 0.014*"help"
2022-10-20 15:15:14,680 : INFO : topic diff=0.355413, rho=0.333333
2022-10-20 15:15:15,468 : INFO : -6.025 per-word bound, 65.1 perplexity estimate based on a held-out corpus of 2000 documents with 23438 words
2022-10-20 15:15:15,469 : INFO : PROGRESS: pass 0, at document #20000/34993
2022-10-20 15:15:15,842 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:15,845 : INFO : topic #0 (0.333): 0.068*"rt" + 0.036*"trump" + 0.030*"january6thcommitteehearings" + 0.023*"democracy" + 0.016*"knew" + 0.015*"maga

2022-10-20 15:15:18,931 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:18,934 : INFO : topic #0 (0.333): 0.068*"rt" + 0.040*"trump" + 0.030*"january6thcommitteehearings" + 0.022*"knew" + 0.013*"democracy" + 0.012*"pelosi" + 0.010*"get" + 0.009*"maga" + 0.009*"hours" + 0.009*"come"
2022-10-20 15:15:18,935 : INFO : topic #1 (0.333): 0.090*"rt" + 0.065*"trump" + 0.058*"knew" + 0.047*"responsible" + 0.046*"mcconnell" + 0.046*"called" + 0.046*"mitch" + 0.046*"backed" + 0.046*"mccarthy" + 0.046*"kevin"
2022-10-20 15:15:18,937 : INFO : topic #2 (0.333): 0.076*"rt" + 0.074*"trump" + 0.021*"donald" + 0.019*"january6thcommitteehearings" + 0.018*"january" + 0.017*"subpoena" + 0.016*"voted" + 0.014*"6th" + 0.014*"lost" + 0.014*"coup"
2022-10-20 15:15:18,938 : INFO : topic diff=0.203364, rho=0.235702
2022-10-20 15:15:18,948 : INFO : PROGRESS: pass 1, at document #2000/34993
2022-10-20 15:15:19,341 : INFO : merging changes from 2000 documents into a mode

2022-10-20 15:15:21,757 : INFO : PROGRESS: pass 1, at document #18000/34993
2022-10-20 15:15:22,076 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:22,079 : INFO : topic #0 (0.333): 0.067*"rt" + 0.034*"trump" + 0.032*"january6thcommitteehearings" + 0.022*"democracy" + 0.016*"maga" + 0.014*"knew" + 0.014*"country" + 0.014*"gop" + 0.013*"violence" + 0.013*"without"
2022-10-20 15:15:22,080 : INFO : topic #1 (0.333): 0.095*"rt" + 0.072*"trump" + 0.070*"knew" + 0.054*"called" + 0.054*"responsible" + 0.054*"mcconnell" + 0.053*"mccarthy" + 0.053*"mitch" + 0.053*"kevin" + 0.053*"backed"
2022-10-20 15:15:22,082 : INFO : topic #2 (0.333): 0.074*"rt" + 0.069*"trump" + 0.020*"january" + 0.019*"donald" + 0.016*"video" + 0.016*"january6thcommitteehearings" + 0.014*"6th" + 0.014*"lost" + 0.014*"stone" + 0.014*"roger"
2022-10-20 15:15:22,082 : INFO : topic diff=0.235625, rho=0.226476
2022-10-20 15:15:22,719 : INFO : -5.940 per-word bound, 61.4 perplexity e

2022-10-20 15:15:25,472 : INFO : topic diff=0.182601, rho=0.226476
2022-10-20 15:15:25,778 : INFO : -5.725 per-word bound, 52.9 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:25,779 : INFO : PROGRESS: pass 1, at document #34993/34993
2022-10-20 15:15:25,943 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:25,946 : INFO : topic #0 (0.333): 0.067*"rt" + 0.038*"trump" + 0.032*"january6thcommitteehearings" + 0.013*"democracy" + 0.013*"knew" + 0.012*"pelosi" + 0.010*"get" + 0.009*"maga" + 0.009*"hours" + 0.008*"come"
2022-10-20 15:15:25,947 : INFO : topic #1 (0.333): 0.090*"rt" + 0.070*"knew" + 0.066*"trump" + 0.048*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:15:25,948 : INFO : topic #2 (0.333): 0.077*"rt" + 0.075*"trump" + 0.021*"donald" + 0.018*"january" + 0.018*"january6thcommitteehearings" + 0.017*"voted" 

2022-10-20 15:15:28,687 : INFO : topic #2 (0.333): 0.075*"rt" + 0.071*"trump" + 0.022*"january" + 0.018*"donald" + 0.017*"video" + 0.016*"6th" + 0.014*"lost" + 0.014*"help" + 0.013*"stone" + 0.013*"coup"
2022-10-20 15:15:28,688 : INFO : topic diff=0.194204, rho=0.220882
2022-10-20 15:15:28,699 : INFO : PROGRESS: pass 2, at document #18000/34993
2022-10-20 15:15:29,031 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:29,035 : INFO : topic #0 (0.333): 0.067*"rt" + 0.035*"january6thcommitteehearings" + 0.033*"trump" + 0.021*"democracy" + 0.015*"maga" + 0.013*"country" + 0.013*"gop" + 0.013*"violence" + 0.012*"without" + 0.012*"love"
2022-10-20 15:15:29,036 : INFO : topic #1 (0.333): 0.095*"rt" + 0.080*"knew" + 0.074*"trump" + 0.056*"called" + 0.055*"responsible" + 0.055*"mcconnell" + 0.054*"mccarthy" + 0.054*"mitch" + 0.054*"kevin" + 0.054*"backed"
2022-10-20 15:15:29,037 : INFO : topic #2 (0.333): 0.075*"rt" + 0.070*"trump" + 0.021*"january" +

2022-10-20 15:15:32,556 : INFO : topic #2 (0.333): 0.077*"rt" + 0.074*"trump" + 0.021*"donald" + 0.018*"january" + 0.017*"voted" + 0.016*"january6thcommitteehearings" + 0.016*"subpoena" + 0.015*"help" + 0.015*"video" + 0.015*"lost"
2022-10-20 15:15:32,557 : INFO : topic diff=0.167189, rho=0.220882
2022-10-20 15:15:32,871 : INFO : -5.715 per-word bound, 52.5 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:32,872 : INFO : PROGRESS: pass 2, at document #34993/34993
2022-10-20 15:15:33,029 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:33,032 : INFO : topic #0 (0.333): 0.067*"rt" + 0.037*"trump" + 0.033*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"get" + 0.009*"maga" + 0.008*"hours" + 0.008*"come" + 0.008*"gop"
2022-10-20 15:15:33,033 : INFO : topic #1 (0.333): 0.090*"rt" + 0.078*"knew" + 0.067*"trump" + 0.048*"responsible" + 0.048*"mcconnell" + 0.048*"called" + 0.

2022-10-20 15:15:35,738 : INFO : topic #1 (0.333): 0.096*"rt" + 0.084*"knew" + 0.074*"trump" + 0.056*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.055*"kevin" + 0.055*"mitch" + 0.055*"backed"
2022-10-20 15:15:35,739 : INFO : topic #2 (0.333): 0.075*"rt" + 0.072*"trump" + 0.022*"january" + 0.018*"donald" + 0.017*"video" + 0.016*"6th" + 0.014*"lost" + 0.014*"help" + 0.014*"stone" + 0.013*"voted"
2022-10-20 15:15:35,740 : INFO : topic diff=0.183082, rho=0.215683
2022-10-20 15:15:35,750 : INFO : PROGRESS: pass 3, at document #18000/34993
2022-10-20 15:15:36,059 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:36,062 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"january6thcommitteehearings" + 0.032*"trump" + 0.020*"democracy" + 0.015*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"without" + 0.012*"love"
2022-10-20 15:15:36,063 : INFO : topic #1 (0.333): 0.096*"rt" + 0.086*"knew" + 0.075*"trump" + 0

2022-10-20 15:15:39,338 : INFO : topic #1 (0.333): 0.089*"rt" + 0.085*"knew" + 0.068*"trump" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:15:39,339 : INFO : topic #2 (0.333): 0.078*"rt" + 0.075*"trump" + 0.022*"donald" + 0.018*"january" + 0.017*"voted" + 0.016*"subpoena" + 0.016*"january6thcommitteehearings" + 0.016*"help" + 0.015*"lost" + 0.015*"video"
2022-10-20 15:15:39,340 : INFO : topic diff=0.158542, rho=0.215683
2022-10-20 15:15:39,666 : INFO : -5.710 per-word bound, 52.3 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:39,667 : INFO : PROGRESS: pass 3, at document #34993/34993
2022-10-20 15:15:39,826 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:39,829 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"trump" + 0.034*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"g

2022-10-20 15:15:42,531 : INFO : topic #1 (0.333): 0.096*"rt" + 0.087*"knew" + 0.075*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:15:42,532 : INFO : topic #2 (0.333): 0.076*"rt" + 0.073*"trump" + 0.022*"january" + 0.019*"donald" + 0.018*"video" + 0.016*"6th" + 0.015*"lost" + 0.014*"help" + 0.014*"stone" + 0.014*"voted"
2022-10-20 15:15:42,533 : INFO : topic diff=0.175526, rho=0.210835
2022-10-20 15:15:42,543 : INFO : PROGRESS: pass 4, at document #18000/34993
2022-10-20 15:15:42,877 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:42,880 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.032*"trump" + 0.020*"democracy" + 0.015*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"without" + 0.012*"love"
2022-10-20 15:15:42,881 : INFO : topic #1 (0.333): 0.096*"rt" + 0.089*"knew" + 0.076*"trump" + 0

2022-10-20 15:15:46,251 : INFO : topic #1 (0.333): 0.089*"rt" + 0.088*"knew" + 0.069*"trump" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:15:46,252 : INFO : topic #2 (0.333): 0.078*"rt" + 0.076*"trump" + 0.022*"donald" + 0.018*"january" + 0.017*"voted" + 0.017*"subpoena" + 0.016*"lost" + 0.016*"help" + 0.016*"january6thcommitteehearings" + 0.016*"video"
2022-10-20 15:15:46,252 : INFO : topic diff=0.152573, rho=0.210835
2022-10-20 15:15:46,549 : INFO : -5.707 per-word bound, 52.3 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:46,550 : INFO : PROGRESS: pass 4, at document #34993/34993
2022-10-20 15:15:46,700 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:46,703 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"trump" + 0.034*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"g

2022-10-20 15:15:49,279 : INFO : topic #1 (0.333): 0.096*"rt" + 0.089*"knew" + 0.075*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:15:49,280 : INFO : topic #2 (0.333): 0.076*"rt" + 0.073*"trump" + 0.023*"january" + 0.019*"donald" + 0.018*"video" + 0.016*"6th" + 0.015*"lost" + 0.014*"help" + 0.014*"voted" + 0.014*"stone"
2022-10-20 15:15:49,281 : INFO : topic diff=0.169643, rho=0.206300
2022-10-20 15:15:49,291 : INFO : PROGRESS: pass 5, at document #18000/34993
2022-10-20 15:15:49,586 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:49,589 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.031*"trump" + 0.020*"democracy" + 0.014*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"without" + 0.012*"love"
2022-10-20 15:15:49,591 : INFO : topic #1 (0.333): 0.096*"rt" + 0.091*"knew" + 0.077*"trump" + 0

2022-10-20 15:15:52,720 : INFO : topic #1 (0.333): 0.090*"knew" + 0.089*"rt" + 0.069*"trump" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:15:52,721 : INFO : topic #2 (0.333): 0.078*"rt" + 0.076*"trump" + 0.022*"donald" + 0.018*"january" + 0.017*"voted" + 0.017*"subpoena" + 0.017*"lost" + 0.016*"help" + 0.016*"video" + 0.015*"january6thcommitteehearings"
2022-10-20 15:15:52,722 : INFO : topic diff=0.147460, rho=0.206300
2022-10-20 15:15:53,027 : INFO : -5.705 per-word bound, 52.2 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:53,028 : INFO : PROGRESS: pass 5, at document #34993/34993
2022-10-20 15:15:53,180 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:53,183 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"g

2022-10-20 15:15:55,716 : INFO : topic #1 (0.333): 0.096*"rt" + 0.090*"knew" + 0.076*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:15:55,717 : INFO : topic #2 (0.333): 0.076*"rt" + 0.074*"trump" + 0.023*"january" + 0.019*"donald" + 0.018*"video" + 0.016*"6th" + 0.015*"lost" + 0.015*"help" + 0.014*"voted" + 0.014*"stone"
2022-10-20 15:15:55,717 : INFO : topic diff=0.164746, rho=0.202045
2022-10-20 15:15:55,727 : INFO : PROGRESS: pass 6, at document #18000/34993
2022-10-20 15:15:56,028 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:15:56,031 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.031*"trump" + 0.020*"democracy" + 0.014*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"without" + 0.012*"love"
2022-10-20 15:15:56,032 : INFO : topic #1 (0.333): 0.096*"rt" + 0.092*"knew" + 0.077*"trump" + 0

2022-10-20 15:15:59,138 : INFO : topic #1 (0.333): 0.090*"knew" + 0.089*"rt" + 0.069*"trump" + 0.047*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:15:59,140 : INFO : topic #2 (0.333): 0.078*"rt" + 0.076*"trump" + 0.022*"donald" + 0.018*"january" + 0.017*"voted" + 0.017*"lost" + 0.017*"subpoena" + 0.016*"help" + 0.016*"video" + 0.015*"coup"
2022-10-20 15:15:59,141 : INFO : topic diff=0.143132, rho=0.202045
2022-10-20 15:15:59,558 : INFO : -5.702 per-word bound, 52.1 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:15:59,559 : INFO : PROGRESS: pass 6, at document #34993/34993
2022-10-20 15:15:59,708 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:15:59,711 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"get" + 0.009*"maga" + 0.

2022-10-20 15:16:02,243 : INFO : topic #1 (0.333): 0.096*"rt" + 0.090*"knew" + 0.076*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:16:02,244 : INFO : topic #2 (0.333): 0.076*"rt" + 0.074*"trump" + 0.023*"january" + 0.019*"donald" + 0.018*"video" + 0.016*"6th" + 0.015*"lost" + 0.015*"help" + 0.014*"voted" + 0.014*"stone"
2022-10-20 15:16:02,245 : INFO : topic diff=0.160769, rho=0.198043
2022-10-20 15:16:02,255 : INFO : PROGRESS: pass 7, at document #18000/34993
2022-10-20 15:16:02,552 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:16:02,555 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.031*"trump" + 0.020*"democracy" + 0.014*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"without" + 0.012*"pelosi"
2022-10-20 15:16:02,556 : INFO : topic #1 (0.333): 0.096*"rt" + 0.092*"knew" + 0.077*"trump" +

2022-10-20 15:16:05,655 : INFO : topic #1 (0.333): 0.090*"knew" + 0.089*"rt" + 0.069*"trump" + 0.048*"responsible" + 0.047*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:16:05,657 : INFO : topic #2 (0.333): 0.078*"rt" + 0.077*"trump" + 0.022*"donald" + 0.019*"january" + 0.017*"voted" + 0.017*"lost" + 0.017*"subpoena" + 0.016*"help" + 0.016*"video" + 0.015*"coup"
2022-10-20 15:16:05,657 : INFO : topic diff=0.139381, rho=0.198043
2022-10-20 15:16:05,951 : INFO : -5.700 per-word bound, 52.0 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:16:05,952 : INFO : PROGRESS: pass 7, at document #34993/34993
2022-10-20 15:16:06,100 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:16:06,103 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"get" + 0.009*"maga" + 0.

2022-10-20 15:16:08,623 : INFO : topic #1 (0.333): 0.096*"rt" + 0.091*"knew" + 0.076*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:16:08,624 : INFO : topic #2 (0.333): 0.076*"rt" + 0.074*"trump" + 0.023*"january" + 0.020*"donald" + 0.018*"video" + 0.016*"6th" + 0.016*"lost" + 0.015*"help" + 0.014*"voted" + 0.014*"stone"
2022-10-20 15:16:08,624 : INFO : topic diff=0.157109, rho=0.194270
2022-10-20 15:16:08,634 : INFO : PROGRESS: pass 8, at document #18000/34993
2022-10-20 15:16:08,933 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:16:08,936 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.030*"trump" + 0.020*"democracy" + 0.014*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"pelosi" + 0.012*"without"
2022-10-20 15:16:08,938 : INFO : topic #1 (0.333): 0.096*"rt" + 0.092*"knew" + 0.077*"trump" +

2022-10-20 15:16:12,028 : INFO : topic #1 (0.333): 0.090*"knew" + 0.089*"rt" + 0.070*"trump" + 0.048*"responsible" + 0.048*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:16:12,029 : INFO : topic #2 (0.333): 0.078*"rt" + 0.077*"trump" + 0.022*"donald" + 0.019*"january" + 0.017*"voted" + 0.017*"lost" + 0.017*"subpoena" + 0.016*"help" + 0.016*"video" + 0.015*"coup"
2022-10-20 15:16:12,030 : INFO : topic diff=0.135944, rho=0.194270
2022-10-20 15:16:12,328 : INFO : -5.698 per-word bound, 51.9 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:16:12,329 : INFO : PROGRESS: pass 8, at document #34993/34993
2022-10-20 15:16:12,480 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:16:12,483 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"get" + 0.009*"maga" + 0.

2022-10-20 15:16:15,129 : INFO : topic #1 (0.333): 0.096*"rt" + 0.090*"knew" + 0.076*"trump" + 0.057*"called" + 0.056*"responsible" + 0.056*"mcconnell" + 0.056*"mccarthy" + 0.056*"kevin" + 0.056*"mitch" + 0.056*"backed"
2022-10-20 15:16:15,130 : INFO : topic #2 (0.333): 0.076*"rt" + 0.075*"trump" + 0.023*"january" + 0.020*"donald" + 0.018*"video" + 0.017*"6th" + 0.016*"lost" + 0.015*"help" + 0.014*"voted" + 0.014*"stone"
2022-10-20 15:16:15,131 : INFO : topic diff=0.153501, rho=0.190705
2022-10-20 15:16:15,142 : INFO : PROGRESS: pass 9, at document #18000/34993
2022-10-20 15:16:15,442 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-20 15:16:15,445 : INFO : topic #0 (0.333): 0.066*"rt" + 0.037*"january6thcommitteehearings" + 0.030*"trump" + 0.020*"democracy" + 0.014*"maga" + 0.013*"country" + 0.013*"gop" + 0.012*"violence" + 0.012*"pelosi" + 0.012*"without"
2022-10-20 15:16:15,446 : INFO : topic #1 (0.333): 0.095*"rt" + 0.092*"knew" + 0.077*"trump" +

2022-10-20 15:16:18,562 : INFO : topic #1 (0.333): 0.090*"knew" + 0.089*"rt" + 0.070*"trump" + 0.048*"responsible" + 0.048*"mcconnell" + 0.047*"called" + 0.047*"mitch" + 0.047*"backed" + 0.047*"mccarthy" + 0.047*"kevin"
2022-10-20 15:16:18,563 : INFO : topic #2 (0.333): 0.078*"rt" + 0.077*"trump" + 0.022*"donald" + 0.019*"january" + 0.017*"lost" + 0.017*"voted" + 0.017*"subpoena" + 0.016*"help" + 0.016*"video" + 0.016*"coup"
2022-10-20 15:16:18,564 : INFO : topic diff=0.132832, rho=0.190705
2022-10-20 15:16:18,859 : INFO : -5.696 per-word bound, 51.8 perplexity estimate based on a held-out corpus of 993 documents with 11228 words
2022-10-20 15:16:18,860 : INFO : PROGRESS: pass 9, at document #34993/34993
2022-10-20 15:16:19,010 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-20 15:16:19,013 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"maga" + 0.009*"get" + 0.

<gensim.models.ldamodel.LdaModel at 0x7fb02bfb6b20>

In [34]:
lda2.print_topics()

2022-10-20 15:16:19,027 : INFO : topic #0 (0.333): 0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"maga" + 0.009*"get" + 0.008*"gop" + 0.008*"hours" + 0.008*"come"
2022-10-20 15:16:19,028 : INFO : topic #1 (0.333): 0.090*"rt" + 0.089*"knew" + 0.069*"trump" + 0.049*"responsible" + 0.048*"mcconnell" + 0.048*"called" + 0.048*"mitch" + 0.048*"backed" + 0.048*"mccarthy" + 0.048*"kevin"
2022-10-20 15:16:19,030 : INFO : topic #2 (0.333): 0.079*"trump" + 0.079*"rt" + 0.023*"donald" + 0.019*"january" + 0.018*"voted" + 0.018*"subpoena" + 0.017*"lost" + 0.016*"january6thcommitteehearings" + 0.015*"6th" + 0.015*"coup"


[(0,
  '0.066*"rt" + 0.036*"trump" + 0.035*"january6thcommitteehearings" + 0.013*"democracy" + 0.011*"pelosi" + 0.009*"maga" + 0.009*"get" + 0.008*"gop" + 0.008*"hours" + 0.008*"come"'),
 (1,
  '0.090*"rt" + 0.089*"knew" + 0.069*"trump" + 0.049*"responsible" + 0.048*"mcconnell" + 0.048*"called" + 0.048*"mitch" + 0.048*"backed" + 0.048*"mccarthy" + 0.048*"kevin"'),
 (2,
  '0.079*"trump" + 0.079*"rt" + 0.023*"donald" + 0.019*"january" + 0.018*"voted" + 0.018*"subpoena" + 0.017*"lost" + 0.016*"january6thcommitteehearings" + 0.015*"6th" + 0.015*"coup"')]