### LDA Probabilistic Modeling
- LDA + CountVectorize (3 topics)
- LDA + CountVectorize (4 topics)
- Don't use TF-IDF with LDA

In [1]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.util import ngrams
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
import re

from gensim import corpora, models, similarities, matutils
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/jennihawk/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
#got tweets at 1:15 Pacific Time 10/14/22
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science/NLP_Unsupervised Learning/Project_NLP/TweetBatch3.csv')
tweets

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...
...,...,...
34988,RT @Adrian_Fontes: The January 6th committee j...,rt fontes the january 6th committee just concl...
34989,#January6thCommitteeHearings and everyone runn...,january6thcommitteehearings and everyone runn...
34990,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
34991,So they are gonna subpoena Trump I am guessing...,so they are gonna subpoena trump am guessing t...


### Remove stop words from cleaned column

In [3]:
stop_words = stopwords.words('english')
stop_words.extend(['rt', 'january', 'january6thcommitteehearings', 'ja'])
tweets['cleaned'] = tweets['cleaned'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop_words)]))

In [4]:
#tweets.head()

### Tokenize Words in Cleaned Column

In [5]:
tweets['tokenized'] = tweets.apply(lambda row: nltk.word_tokenize(row['cleaned']), axis=1)

In [6]:
tweets.head()

Unnamed: 0,text,cleaned,tokenized
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember forthepeople votebluein2022 standwit...,"[roevember, forthepeople, votebluein2022, stan..."
1,RT @sandibachom: IS THIS THING ON???!!This is ...,thing pathetic acting sec defense chris miller...,"[thing, pathetic, acting, sec, defense, chris,..."
2,RT @sandibachom: IS THIS THING ON???!!This is ...,thing pathetic acting sec defense chris miller...,"[thing, pathetic, acting, sec, defense, chris,..."
3,RT @tleehumphrey: Today is the beginning of th...,today beginning inquiry trudeau gov use emerge...,"[today, beginning, inquiry, trudeau, gov, use,..."
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,mitch mcconnell kevin mccarthy knew trump resp...,"[mitch, mcconnell, kevin, mccarthy, knew, trum..."


## Countvectorizer + LDA 

### Countvectorizer
- a method to convert text to numerical data
- By default, Countvectorizer converts the text to lowercase and uses word-level tokenization.
- if using 'tokenize' column  for loops or list comprehension will help handle lists of lists. CountVectorizer takes list. 
- Countervectorize will do word tokenization not other types of tokenization

In [7]:
corpus = list(tweets.cleaned)

In [8]:
#turn corpus into a document-term matrix
vectorizer = CountVectorizer()
doc_term = vectorizer.fit_transform(corpus)
doc_term_df = pd.DataFrame(doc_term.toarray(), columns=vectorizer.get_feature_names())



In [9]:
doc_term_df.shape

(34993, 9400)

In [10]:
#doc_term_df.iloc[:, : 100]

### LDA: Reduce Dimensionality 
- Probabilistic Modeling using gensim
- Increase the number of passes to get more stable results.

#### Fit LDA Model - 4 Topics

In [11]:
#Fit an LDA model using `LdaModel` with num_topics=tbd topics. Set the `passes` hyperparameter to 10 so that the corpus will be scanned 10 times. Save the fitted model as `lda`.
#NOTE: This may take a few minutes to run. Take a look at the log while you're waiting.
term_doc = doc_term.transpose()
corpus = matutils.Sparse2Corpus(term_doc)
id2word = dict((v, k) for k, v in vectorizer.vocabulary_.items())
lda = models.LdaModel(corpus=corpus, num_topics=4, id2word=id2word, random_state=100, passes=10)
### END SOLUTION
lda

2022-10-26 10:53:13,961 : INFO : using symmetric alpha at 0.25
2022-10-26 10:53:13,962 : INFO : using symmetric eta at 0.25
2022-10-26 10:53:13,965 : INFO : using serial LDA version on this node
2022-10-26 10:53:13,970 : INFO : running online (multi-pass) LDA training, 4 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-26 10:53:13,981 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-26 10:53:15,051 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:15,056 : INFO : topic #0 (0.250): 0.043*"knew" + 0.029*"pelosi" + 0.026*"trump" + 0.025*"would" + 0.025*"nancy" + 0.022*"th" + 0.022*"protests" + 0.022*"film" + 0.022*"advance" + 0.022*"daughter"
2022-10-26 10:53:15,058 : INFO : topic #1 (0.250): 0.081*"trump" + 0.050*"knew" + 0.048*"mitch" + 0.048*"responsible" + 0.048*"backed"

2022-10-26 10:53:17,274 : INFO : topic diff=0.361766, rho=0.377964
2022-10-26 10:53:17,284 : INFO : PROGRESS: pass 0, at document #16000/34993
2022-10-26 10:53:17,592 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:17,596 : INFO : topic #0 (0.250): 0.030*"knew" + 0.016*"amp" + 0.013*"pelosi" + 0.012*"trump" + 0.012*"today" + 0.010*"like" + 0.009*"day" + 0.009*"going" + 0.008*"still" + 0.008*"nancy"
2022-10-26 10:53:17,597 : INFO : topic #1 (0.250): 0.093*"trump" + 0.086*"knew" + 0.070*"called" + 0.069*"responsible" + 0.069*"mcconnell" + 0.069*"mccarthy" + 0.069*"kevin" + 0.069*"mitch" + 0.068*"backed" + 0.013*"voted"
2022-10-26 10:53:17,598 : INFO : topic #2 (0.250): 0.039*"maga" + 0.036*"democracy" + 0.036*"country" + 0.035*"gop" + 0.033*"violence" + 0.032*"without" + 0.031*"love" + 0.031*"january6thcomm" + 0.030*"mark" + 0.030*"overthrowing"
2022-10-26 10:53:17,599 : INFO : topic #3 (0.250): 0.096*"trump" + 0.018*"decided" + 0.018*"lost" 

2022-10-26 10:53:20,390 : INFO : topic #1 (0.250): 0.098*"trump" + 0.083*"knew" + 0.063*"responsible" + 0.063*"backed" + 0.063*"mcconnell" + 0.062*"called" + 0.062*"mitch" + 0.062*"mccarthy" + 0.062*"kevin" + 0.023*"voted"
2022-10-26 10:53:20,390 : INFO : topic #2 (0.250): 0.031*"trump" + 0.021*"maga" + 0.020*"thing" + 0.019*"chris" + 0.019*"country" + 0.018*"national" + 0.018*"gop" + 0.018*"miller" + 0.018*"person" + 0.018*"acting"
2022-10-26 10:53:20,391 : INFO : topic #3 (0.250): 0.102*"trump" + 0.022*"lost" + 0.021*"decided" + 0.020*"video" + 0.018*"coup" + 0.018*"help" + 0.018*"new" + 0.017*"roger" + 0.017*"stone" + 0.016*"yet"
2022-10-26 10:53:20,392 : INFO : topic diff=0.241935, rho=0.258199
2022-10-26 10:53:20,402 : INFO : PROGRESS: pass 0, at document #32000/34993
2022-10-26 10:53:20,688 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:20,691 : INFO : topic #0 (0.250): 0.030*"knew" + 0.013*"amp" + 0.011*"pelosi" + 0.010*"trump" + 0.

2022-10-26 10:53:22,638 : INFO : topic #3 (0.250): 0.099*"trump" + 0.018*"decided" + 0.017*"video" + 0.017*"pelosi" + 0.016*"6th" + 0.015*"lost" + 0.014*"attack" + 0.013*"help" + 0.013*"coup" + 0.012*"stone"
2022-10-26 10:53:22,639 : INFO : topic diff=0.235611, rho=0.226476
2022-10-26 10:53:22,649 : INFO : PROGRESS: pass 1, at document #10000/34993
2022-10-26 10:53:22,916 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:22,919 : INFO : topic #0 (0.250): 0.031*"knew" + 0.014*"pelosi" + 0.014*"amp" + 0.011*"would" + 0.010*"trump" + 0.010*"like" + 0.009*"nancy" + 0.009*"still" + 0.007*"fbi" + 0.007*"held"
2022-10-26 10:53:22,920 : INFO : topic #1 (0.250): 0.086*"trump" + 0.075*"knew" + 0.064*"responsible" + 0.064*"called" + 0.063*"mcconnell" + 0.063*"mccarthy" + 0.063*"mitch" + 0.063*"kevin" + 0.063*"backed" + 0.024*"chair"
2022-10-26 10:53:22,922 : INFO : topic #2 (0.250): 0.039*"maga" + 0.035*"country" + 0.035*"democracy" + 0.034*"without" + 

2022-10-26 10:53:25,793 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:25,796 : INFO : topic #0 (0.250): 0.026*"knew" + 0.014*"pelosi" + 0.013*"amp" + 0.010*"trump" + 0.009*"like" + 0.009*"today" + 0.007*"going" + 0.007*"still" + 0.007*"jan" + 0.007*"held"
2022-10-26 10:53:25,797 : INFO : topic #1 (0.250): 0.098*"trump" + 0.091*"knew" + 0.068*"called" + 0.068*"mcconnell" + 0.068*"responsible" + 0.067*"mccarthy" + 0.067*"mitch" + 0.067*"kevin" + 0.067*"backed" + 0.019*"subpoena"
2022-10-26 10:53:25,798 : INFO : topic #2 (0.250): 0.031*"maga" + 0.027*"country" + 0.026*"democracy" + 0.026*"trump" + 0.025*"violence" + 0.023*"love" + 0.023*"without" + 0.023*"gop" + 0.023*"january6thcomm" + 0.022*"mark"
2022-10-26 10:53:25,799 : INFO : topic #3 (0.250): 0.106*"trump" + 0.024*"decided" + 0.021*"lost" + 0.018*"video" + 0.017*"help" + 0.017*"new" + 0.017*"coup" + 0.017*"roger" + 0.017*"stone" + 0.015*"yet"
2022-10-26 10:53:25,800 : INFO : topic dif

2022-10-26 10:53:28,123 : INFO : topic #2 (0.250): 0.030*"trump" + 0.025*"maga" + 0.021*"country" + 0.021*"love" + 0.020*"democracy" + 0.020*"violence" + 0.019*"without" + 0.019*"gop" + 0.019*"january6thcomm" + 0.019*"thing"
2022-10-26 10:53:28,124 : INFO : topic #3 (0.250): 0.107*"trump" + 0.020*"decided" + 0.019*"lost" + 0.018*"attack" + 0.017*"video" + 0.016*"help" + 0.016*"coup" + 0.016*"stone" + 0.015*"new" + 0.015*"roger"
2022-10-26 10:53:28,124 : INFO : topic diff=0.215515, rho=0.220882
2022-10-26 10:53:28,134 : INFO : PROGRESS: pass 2, at document #4000/34993
2022-10-26 10:53:28,435 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:28,438 : INFO : topic #0 (0.250): 0.032*"knew" + 0.016*"pelosi" + 0.012*"amp" + 0.011*"nancy" + 0.011*"would" + 0.009*"trump" + 0.008*"still" + 0.008*"like" + 0.007*"today" + 0.007*"going"
2022-10-26 10:53:28,439 : INFO : topic #1 (0.250): 0.093*"trump" + 0.078*"knew" + 0.063*"responsible" + 0.063*"called" 

2022-10-26 10:53:30,208 : INFO : topic diff=0.162765, rho=0.220882
2022-10-26 10:53:30,218 : INFO : PROGRESS: pass 2, at document #18000/34993
2022-10-26 10:53:30,513 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:30,517 : INFO : topic #0 (0.250): 0.026*"knew" + 0.013*"amp" + 0.013*"pelosi" + 0.010*"trump" + 0.009*"like" + 0.009*"held" + 0.008*"today" + 0.008*"still" + 0.008*"nancy" + 0.008*"would"
2022-10-26 10:53:30,518 : INFO : topic #1 (0.250): 0.095*"trump" + 0.089*"knew" + 0.069*"called" + 0.068*"responsible" + 0.068*"mcconnell" + 0.068*"mccarthy" + 0.068*"mitch" + 0.068*"kevin" + 0.067*"backed" + 0.016*"subpoena"
2022-10-26 10:53:30,519 : INFO : topic #2 (0.250): 0.038*"maga" + 0.034*"country" + 0.033*"democracy" + 0.031*"violence" + 0.031*"without" + 0.030*"love" + 0.030*"gop" + 0.029*"january6thcomm" + 0.028*"mark" + 0.028*"overthrowing"
2022-10-26 10:53:30,520 : INFO : topic #3 (0.250): 0.104*"trump" + 0.021*"decided" + 0.020*"lo

2022-10-26 10:53:33,207 : INFO : topic #1 (0.250): 0.100*"trump" + 0.088*"knew" + 0.064*"responsible" + 0.064*"backed" + 0.064*"mcconnell" + 0.064*"called" + 0.064*"mitch" + 0.064*"mccarthy" + 0.064*"kevin" + 0.023*"subpoena"
2022-10-26 10:53:33,208 : INFO : topic #2 (0.250): 0.033*"trump" + 0.022*"maga" + 0.021*"thing" + 0.021*"chris" + 0.020*"national" + 0.019*"country" + 0.019*"miller" + 0.019*"person" + 0.019*"acting" + 0.019*"sec"
2022-10-26 10:53:33,209 : INFO : topic #3 (0.250): 0.112*"trump" + 0.023*"lost" + 0.022*"decided" + 0.020*"video" + 0.019*"new" + 0.019*"coup" + 0.019*"help" + 0.018*"stone" + 0.018*"roger" + 0.017*"attack"
2022-10-26 10:53:33,209 : INFO : topic diff=0.154144, rho=0.220882
2022-10-26 10:53:33,219 : INFO : PROGRESS: pass 2, at document #34000/34993
2022-10-26 10:53:33,511 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:33,515 : INFO : topic #0 (0.250): 0.032*"knew" + 0.012*"amp" + 0.011*"trump" + 0.010*"pelosi

2022-10-26 10:53:35,494 : INFO : topic #3 (0.250): 0.108*"trump" + 0.021*"decided" + 0.019*"video" + 0.017*"lost" + 0.017*"6th" + 0.016*"pelosi" + 0.016*"attack" + 0.015*"help" + 0.015*"coup" + 0.014*"stone"
2022-10-26 10:53:35,495 : INFO : topic diff=0.145161, rho=0.215683
2022-10-26 10:53:35,504 : INFO : PROGRESS: pass 3, at document #12000/34993
2022-10-26 10:53:35,787 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:35,791 : INFO : topic #0 (0.250): 0.025*"knew" + 0.013*"pelosi" + 0.012*"amp" + 0.012*"trump" + 0.009*"would" + 0.009*"nancy" + 0.008*"still" + 0.008*"like" + 0.007*"today" + 0.007*"held"
2022-10-26 10:53:35,792 : INFO : topic #1 (0.250): 0.089*"trump" + 0.078*"knew" + 0.066*"called" + 0.066*"responsible" + 0.066*"mcconnell" + 0.066*"mccarthy" + 0.065*"kevin" + 0.065*"mitch" + 0.065*"backed" + 0.019*"chair"
2022-10-26 10:53:35,793 : INFO : topic #2 (0.250): 0.039*"maga" + 0.036*"country" + 0.036*"democracy" + 0.033*"without" 

2022-10-26 10:53:38,383 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:38,387 : INFO : topic #0 (0.250): 0.024*"knew" + 0.015*"trump" + 0.014*"pelosi" + 0.011*"amp" + 0.008*"like" + 0.008*"today" + 0.007*"still" + 0.006*"going" + 0.006*"jan" + 0.006*"held"
2022-10-26 10:53:38,388 : INFO : topic #1 (0.250): 0.099*"trump" + 0.090*"knew" + 0.067*"called" + 0.066*"mcconnell" + 0.066*"responsible" + 0.066*"mccarthy" + 0.066*"mitch" + 0.066*"kevin" + 0.066*"backed" + 0.021*"subpoena"
2022-10-26 10:53:38,389 : INFO : topic #2 (0.250): 0.027*"maga" + 0.027*"trump" + 0.024*"country" + 0.023*"democracy" + 0.022*"violence" + 0.021*"love" + 0.021*"without" + 0.020*"gop" + 0.020*"january6thcomm" + 0.019*"mark"
2022-10-26 10:53:38,391 : INFO : topic #3 (0.250): 0.108*"trump" + 0.025*"decided" + 0.024*"lost" + 0.020*"video" + 0.020*"new" + 0.019*"help" + 0.019*"roger" + 0.019*"coup" + 0.019*"stone" + 0.017*"yet"
2022-10-26 10:53:38,392 : INFO : topic dif

2022-10-26 10:53:40,755 : INFO : topic #2 (0.250): 0.030*"maga" + 0.026*"country" + 0.026*"trump" + 0.025*"love" + 0.025*"without" + 0.025*"democracy" + 0.024*"violence" + 0.023*"january6thcomm" + 0.023*"gop" + 0.022*"mark"
2022-10-26 10:53:40,757 : INFO : topic #3 (0.250): 0.108*"trump" + 0.021*"decided" + 0.019*"lost" + 0.019*"video" + 0.018*"attack" + 0.017*"6th" + 0.017*"coup" + 0.016*"help" + 0.016*"stone" + 0.015*"roger"
2022-10-26 10:53:40,758 : INFO : topic diff=0.171588, rho=0.210835
2022-10-26 10:53:40,768 : INFO : PROGRESS: pass 4, at document #6000/34993
2022-10-26 10:53:41,041 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:41,044 : INFO : topic #0 (0.250): 0.028*"knew" + 0.015*"pelosi" + 0.013*"trump" + 0.011*"amp" + 0.010*"would" + 0.010*"nancy" + 0.008*"like" + 0.008*"still" + 0.007*"today" + 0.006*"fbi"
2022-10-26 10:53:41,046 : INFO : topic #1 (0.250): 0.089*"trump" + 0.078*"knew" + 0.064*"called" + 0.064*"responsible" + 0

2022-10-26 10:53:42,767 : INFO : topic diff=0.167912, rho=0.210835
2022-10-26 10:53:43,288 : INFO : -5.978 per-word bound, 63.0 perplexity estimate based on a held-out corpus of 2000 documents with 20829 words
2022-10-26 10:53:43,288 : INFO : PROGRESS: pass 4, at document #20000/34993
2022-10-26 10:53:43,560 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:43,564 : INFO : topic #0 (0.250): 0.023*"knew" + 0.015*"trump" + 0.014*"pelosi" + 0.012*"amp" + 0.008*"today" + 0.008*"like" + 0.007*"nancy" + 0.007*"held" + 0.007*"still" + 0.006*"going"
2022-10-26 10:53:43,565 : INFO : topic #1 (0.250): 0.096*"trump" + 0.090*"knew" + 0.068*"called" + 0.068*"responsible" + 0.067*"mcconnell" + 0.067*"mccarthy" + 0.067*"mitch" + 0.067*"kevin" + 0.067*"backed" + 0.018*"subpoena"
2022-10-26 10:53:43,566 : INFO : topic #2 (0.250): 0.037*"maga" + 0.032*"country" + 0.032*"democracy" + 0.030*"violence" + 0.029*"love" + 0.029*"without" + 0.028*"gop" + 0.028*"janua

2022-10-26 10:53:45,671 : INFO : topic #1 (0.250): 0.100*"trump" + 0.086*"knew" + 0.064*"responsible" + 0.064*"mcconnell" + 0.064*"called" + 0.064*"mitch" + 0.064*"backed" + 0.064*"mccarthy" + 0.064*"kevin" + 0.024*"subpoena"
2022-10-26 10:53:45,672 : INFO : topic #2 (0.250): 0.034*"trump" + 0.022*"thing" + 0.022*"chris" + 0.021*"maga" + 0.021*"national" + 0.020*"miller" + 0.020*"person" + 0.020*"acting" + 0.020*"sec" + 0.020*"defense"
2022-10-26 10:53:45,673 : INFO : topic #3 (0.250): 0.109*"trump" + 0.022*"lost" + 0.022*"decided" + 0.019*"help" + 0.019*"video" + 0.019*"attack" + 0.018*"coup" + 0.018*"new" + 0.017*"stone" + 0.017*"roger"
2022-10-26 10:53:45,674 : INFO : topic diff=0.128186, rho=0.210835
2022-10-26 10:53:45,930 : INFO : -5.788 per-word bound, 55.3 perplexity estimate based on a held-out corpus of 993 documents with 9868 words
2022-10-26 10:53:45,931 : INFO : PROGRESS: pass 4, at document #34993/34993
2022-10-26 10:53:46,063 : INFO : merging changes from 993 documents i

2022-10-26 10:53:47,793 : INFO : topic #3 (0.250): 0.106*"trump" + 0.022*"decided" + 0.021*"video" + 0.018*"lost" + 0.017*"6th" + 0.016*"attack" + 0.016*"help" + 0.016*"pelosi" + 0.015*"coup" + 0.015*"stone"
2022-10-26 10:53:47,794 : INFO : topic diff=0.154149, rho=0.206300
2022-10-26 10:53:47,804 : INFO : PROGRESS: pass 5, at document #14000/34993
2022-10-26 10:53:48,077 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:48,081 : INFO : topic #0 (0.250): 0.024*"knew" + 0.015*"trump" + 0.013*"pelosi" + 0.012*"amp" + 0.008*"nancy" + 0.008*"still" + 0.007*"like" + 0.007*"held" + 0.007*"would" + 0.007*"today"
2022-10-26 10:53:48,082 : INFO : topic #1 (0.250): 0.091*"trump" + 0.083*"knew" + 0.067*"called" + 0.066*"responsible" + 0.066*"mcconnell" + 0.066*"mccarthy" + 0.066*"kevin" + 0.066*"mitch" + 0.066*"backed" + 0.016*"subpoena"
2022-10-26 10:53:48,083 : INFO : topic #2 (0.250): 0.038*"maga" + 0.035*"country" + 0.034*"democracy" + 0.032*"withou

2022-10-26 10:53:50,625 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:50,629 : INFO : topic #0 (0.250): 0.023*"knew" + 0.015*"trump" + 0.013*"pelosi" + 0.011*"amp" + 0.008*"like" + 0.007*"today" + 0.007*"still" + 0.005*"fbi" + 0.005*"going" + 0.005*"jan"
2022-10-26 10:53:50,630 : INFO : topic #1 (0.250): 0.099*"trump" + 0.089*"knew" + 0.066*"called" + 0.065*"responsible" + 0.065*"mcconnell" + 0.065*"mccarthy" + 0.065*"mitch" + 0.065*"kevin" + 0.065*"backed" + 0.022*"subpoena"
2022-10-26 10:53:50,631 : INFO : topic #2 (0.250): 0.029*"trump" + 0.025*"maga" + 0.022*"country" + 0.021*"democracy" + 0.020*"violence" + 0.019*"love" + 0.019*"thing" + 0.019*"without" + 0.019*"national" + 0.019*"chris"
2022-10-26 10:53:50,633 : INFO : topic #3 (0.250): 0.109*"trump" + 0.025*"decided" + 0.025*"lost" + 0.023*"video" + 0.021*"help" + 0.020*"new" + 0.020*"roger" + 0.020*"coup" + 0.020*"stone" + 0.018*"yet"
2022-10-26 10:53:50,634 : INFO : topic diff=0.

2022-10-26 10:53:52,793 : INFO : topic #2 (0.250): 0.034*"maga" + 0.030*"country" + 0.030*"democracy" + 0.029*"without" + 0.029*"violence" + 0.029*"love" + 0.027*"gop" + 0.027*"january6thcomm" + 0.026*"mark" + 0.026*"overthrowing"
2022-10-26 10:53:52,794 : INFO : topic #3 (0.250): 0.109*"trump" + 0.021*"decided" + 0.019*"video" + 0.019*"lost" + 0.018*"6th" + 0.017*"attack" + 0.016*"coup" + 0.016*"help" + 0.015*"stone" + 0.015*"roger"
2022-10-26 10:53:52,795 : INFO : topic diff=0.191271, rho=0.202045
2022-10-26 10:53:52,806 : INFO : PROGRESS: pass 6, at document #8000/34993
2022-10-26 10:53:53,070 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:53,073 : INFO : topic #0 (0.250): 0.027*"knew" + 0.015*"pelosi" + 0.014*"trump" + 0.010*"nancy" + 0.010*"would" + 0.010*"amp" + 0.008*"still" + 0.008*"like" + 0.006*"fbi" + 0.006*"held"
2022-10-26 10:53:53,075 : INFO : topic #1 (0.250): 0.088*"trump" + 0.079*"knew" + 0.066*"responsible" + 0.066*"calle

2022-10-26 10:53:55,401 : INFO : topic diff=0.159544, rho=0.202045
2022-10-26 10:53:55,412 : INFO : PROGRESS: pass 6, at document #22000/34993
2022-10-26 10:53:55,682 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:55,686 : INFO : topic #0 (0.250): 0.022*"knew" + 0.017*"trump" + 0.014*"pelosi" + 0.012*"amp" + 0.009*"today" + 0.008*"like" + 0.007*"nancy" + 0.007*"going" + 0.007*"held" + 0.006*"still"
2022-10-26 10:53:55,687 : INFO : topic #1 (0.250): 0.097*"trump" + 0.090*"knew" + 0.069*"called" + 0.069*"responsible" + 0.068*"mcconnell" + 0.068*"mccarthy" + 0.068*"mitch" + 0.068*"kevin" + 0.068*"backed" + 0.019*"subpoena"
2022-10-26 10:53:55,688 : INFO : topic #2 (0.250): 0.033*"maga" + 0.029*"country" + 0.029*"democracy" + 0.027*"violence" + 0.026*"without" + 0.026*"love" + 0.025*"gop" + 0.025*"january6thcomm" + 0.024*"mark" + 0.024*"trump"
2022-10-26 10:53:55,690 : INFO : topic #3 (0.250): 0.108*"trump" + 0.026*"decided" + 0.023*"lost" + 0

2022-10-26 10:53:57,850 : INFO : topic #1 (0.250): 0.099*"trump" + 0.083*"knew" + 0.064*"responsible" + 0.063*"mcconnell" + 0.063*"called" + 0.063*"mitch" + 0.063*"backed" + 0.063*"mccarthy" + 0.063*"kevin" + 0.025*"subpoena"
2022-10-26 10:53:57,851 : INFO : topic #2 (0.250): 0.034*"trump" + 0.022*"chris" + 0.022*"maga" + 0.022*"thing" + 0.021*"national" + 0.020*"miller" + 0.020*"person" + 0.020*"sec" + 0.020*"acting" + 0.020*"defense"
2022-10-26 10:53:57,853 : INFO : topic #3 (0.250): 0.112*"trump" + 0.022*"lost" + 0.021*"decided" + 0.020*"attack" + 0.018*"coup" + 0.018*"help" + 0.018*"video" + 0.017*"new" + 0.017*"6th" + 0.017*"stone"
2022-10-26 10:53:57,853 : INFO : topic diff=0.108032, rho=0.202045
2022-10-26 10:53:57,863 : INFO : PROGRESS: pass 7, at document #2000/34993
2022-10-26 10:53:58,161 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:53:58,164 : INFO : topic #0 (0.250): 0.026*"knew" + 0.016*"trump" + 0.013*"pelosi" + 0.010*"amp" +

2022-10-26 10:53:59,874 : INFO : topic #3 (0.250): 0.107*"trump" + 0.022*"decided" + 0.020*"video" + 0.020*"lost" + 0.017*"help" + 0.016*"6th" + 0.016*"attack" + 0.016*"coup" + 0.016*"stone" + 0.016*"roger"
2022-10-26 10:53:59,876 : INFO : topic diff=0.144533, rho=0.198043
2022-10-26 10:53:59,885 : INFO : PROGRESS: pass 7, at document #16000/34993
2022-10-26 10:54:00,150 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:00,153 : INFO : topic #0 (0.250): 0.023*"knew" + 0.015*"trump" + 0.014*"pelosi" + 0.012*"amp" + 0.009*"nancy" + 0.008*"held" + 0.008*"today" + 0.007*"like" + 0.007*"still" + 0.007*"would"
2022-10-26 10:54:00,154 : INFO : topic #1 (0.250): 0.093*"trump" + 0.088*"knew" + 0.070*"called" + 0.069*"responsible" + 0.069*"mcconnell" + 0.069*"mccarthy" + 0.069*"kevin" + 0.069*"mitch" + 0.068*"backed" + 0.015*"subpoena"
2022-10-26 10:54:00,155 : INFO : topic #2 (0.250): 0.039*"maga" + 0.035*"country" + 0.035*"democracy" + 0.032*"without

2022-10-26 10:54:02,684 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:02,688 : INFO : topic #0 (0.250): 0.021*"knew" + 0.015*"trump" + 0.012*"pelosi" + 0.011*"amp" + 0.008*"like" + 0.007*"today" + 0.006*"still" + 0.006*"going" + 0.005*"jan" + 0.005*"fbi"
2022-10-26 10:54:02,689 : INFO : topic #1 (0.250): 0.099*"trump" + 0.088*"knew" + 0.065*"responsible" + 0.065*"called" + 0.065*"backed" + 0.065*"mcconnell" + 0.064*"mccarthy" + 0.064*"mitch" + 0.064*"kevin" + 0.023*"subpoena"
2022-10-26 10:54:02,690 : INFO : topic #2 (0.250): 0.030*"trump" + 0.024*"maga" + 0.022*"country" + 0.020*"democracy" + 0.020*"thing" + 0.020*"chris" + 0.019*"violence" + 0.019*"national" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:02,692 : INFO : topic #3 (0.250): 0.110*"trump" + 0.025*"lost" + 0.024*"decided" + 0.022*"video" + 0.021*"coup" + 0.020*"help" + 0.020*"new" + 0.020*"roger" + 0.020*"stone" + 0.018*"yet"
2022-10-26 10:54:02,693 : INFO : topic diff=0.

2022-10-26 10:54:04,914 : INFO : topic #2 (0.250): 0.037*"maga" + 0.034*"country" + 0.033*"democracy" + 0.033*"love" + 0.032*"without" + 0.032*"violence" + 0.030*"gop" + 0.030*"january6thcomm" + 0.029*"mark" + 0.029*"overthrowing"
2022-10-26 10:54:04,916 : INFO : topic #3 (0.250): 0.109*"trump" + 0.022*"decided" + 0.020*"video" + 0.018*"6th" + 0.018*"lost" + 0.017*"attack" + 0.016*"help" + 0.016*"pelosi" + 0.015*"coup" + 0.015*"stone"
2022-10-26 10:54:04,916 : INFO : topic diff=0.160972, rho=0.194270
2022-10-26 10:54:04,926 : INFO : PROGRESS: pass 8, at document #10000/34993
2022-10-26 10:54:05,183 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:05,187 : INFO : topic #0 (0.250): 0.025*"knew" + 0.015*"trump" + 0.014*"pelosi" + 0.010*"amp" + 0.009*"nancy" + 0.009*"would" + 0.008*"like" + 0.008*"still" + 0.006*"held" + 0.006*"fbi"
2022-10-26 10:54:05,188 : INFO : topic #1 (0.250): 0.087*"trump" + 0.078*"knew" + 0.065*"responsible" + 0.065*"cal

2022-10-26 10:54:07,479 : INFO : topic diff=0.142106, rho=0.194270
2022-10-26 10:54:07,490 : INFO : PROGRESS: pass 8, at document #24000/34993
2022-10-26 10:54:07,762 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:07,766 : INFO : topic #0 (0.250): 0.023*"knew" + 0.017*"trump" + 0.014*"pelosi" + 0.011*"amp" + 0.008*"like" + 0.008*"today" + 0.006*"nancy" + 0.006*"going" + 0.006*"held" + 0.006*"still"
2022-10-26 10:54:07,767 : INFO : topic #1 (0.250): 0.098*"trump" + 0.092*"knew" + 0.069*"called" + 0.068*"mcconnell" + 0.068*"responsible" + 0.068*"mccarthy" + 0.068*"mitch" + 0.068*"kevin" + 0.068*"backed" + 0.020*"subpoena"
2022-10-26 10:54:07,768 : INFO : topic #2 (0.250): 0.032*"maga" + 0.028*"country" + 0.027*"democracy" + 0.026*"violence" + 0.025*"trump" + 0.025*"love" + 0.025*"without" + 0.024*"gop" + 0.024*"january6thcomm" + 0.023*"mark"
2022-10-26 10:54:07,770 : INFO : topic #3 (0.250): 0.109*"trump" + 0.026*"decided" + 0.023*"lost" + 0

2022-10-26 10:54:09,932 : INFO : topic #1 (0.250): 0.096*"trump" + 0.083*"knew" + 0.065*"responsible" + 0.064*"mcconnell" + 0.064*"called" + 0.064*"mitch" + 0.064*"backed" + 0.064*"mccarthy" + 0.064*"kevin" + 0.022*"subpoena"
2022-10-26 10:54:09,933 : INFO : topic #2 (0.250): 0.030*"trump" + 0.026*"maga" + 0.022*"country" + 0.021*"love" + 0.021*"democracy" + 0.020*"violence" + 0.020*"without" + 0.019*"thing" + 0.019*"january6thcomm" + 0.019*"gop"
2022-10-26 10:54:09,935 : INFO : topic #3 (0.250): 0.111*"trump" + 0.022*"decided" + 0.021*"lost" + 0.019*"attack" + 0.019*"video" + 0.018*"help" + 0.018*"coup" + 0.017*"stone" + 0.017*"new" + 0.017*"roger"
2022-10-26 10:54:09,935 : INFO : topic diff=0.164985, rho=0.190705
2022-10-26 10:54:09,945 : INFO : PROGRESS: pass 9, at document #4000/34993
2022-10-26 10:54:10,223 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:10,227 : INFO : topic #0 (0.250): 0.027*"knew" + 0.016*"pelosi" + 0.015*"trump" + 

2022-10-26 10:54:11,930 : INFO : topic #3 (0.250): 0.108*"trump" + 0.022*"decided" + 0.021*"lost" + 0.020*"video" + 0.017*"6th" + 0.017*"help" + 0.016*"stone" + 0.016*"coup" + 0.016*"roger" + 0.016*"attack"
2022-10-26 10:54:11,931 : INFO : topic diff=0.127641, rho=0.190705
2022-10-26 10:54:11,941 : INFO : PROGRESS: pass 9, at document #18000/34993
2022-10-26 10:54:12,210 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:12,213 : INFO : topic #0 (0.250): 0.023*"knew" + 0.015*"trump" + 0.013*"pelosi" + 0.012*"amp" + 0.008*"nancy" + 0.008*"like" + 0.008*"held" + 0.007*"today" + 0.007*"still" + 0.007*"would"
2022-10-26 10:54:12,214 : INFO : topic #1 (0.250): 0.094*"trump" + 0.089*"knew" + 0.070*"called" + 0.069*"responsible" + 0.068*"mcconnell" + 0.068*"mccarthy" + 0.068*"mitch" + 0.068*"kevin" + 0.068*"backed" + 0.017*"subpoena"
2022-10-26 10:54:12,215 : INFO : topic #2 (0.250): 0.038*"maga" + 0.034*"country" + 0.033*"democracy" + 0.031*"violenc

2022-10-26 10:54:14,822 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:14,826 : INFO : topic #0 (0.250): 0.024*"knew" + 0.015*"trump" + 0.011*"pelosi" + 0.011*"amp" + 0.008*"like" + 0.007*"today" + 0.007*"still" + 0.006*"going" + 0.005*"fbi" + 0.005*"jan"
2022-10-26 10:54:14,827 : INFO : topic #1 (0.250): 0.100*"trump" + 0.089*"knew" + 0.065*"responsible" + 0.065*"called" + 0.065*"mcconnell" + 0.065*"backed" + 0.065*"mitch" + 0.065*"mccarthy" + 0.064*"kevin" + 0.023*"subpoena"
2022-10-26 10:54:14,829 : INFO : topic #2 (0.250): 0.033*"trump" + 0.023*"maga" + 0.021*"thing" + 0.021*"chris" + 0.020*"country" + 0.020*"national" + 0.019*"democracy" + 0.019*"miller" + 0.019*"person" + 0.019*"acting"
2022-10-26 10:54:14,830 : INFO : topic #3 (0.250): 0.113*"trump" + 0.025*"lost" + 0.024*"decided" + 0.021*"video" + 0.020*"help" + 0.020*"coup" + 0.020*"new" + 0.019*"stone" + 0.019*"roger" + 0.018*"attack"
2022-10-26 10:54:14,831 : INFO : topic diff=

<gensim.models.ldamodel.LdaModel at 0x7f9ca73d86d0>

### How to Optimize Topics:
- The results may look fuzzy though, so to clean them up, you have several options:
- Increase the number of passes to get more stable results.
- Change the number of topics
- Clean up the text more in the CountVectorizer step, such as adding to the stop word list, removing common words, etc.
- Spend a few minutes doing at least one of these things to make your model better before moving on.

In [12]:
lda.print_topics()

2022-10-26 10:54:15,541 : INFO : topic #0 (0.250): 0.027*"knew" + 0.015*"trump" + 0.011*"pelosi" + 0.010*"amp" + 0.009*"today" + 0.008*"like" + 0.008*"still" + 0.006*"fbi" + 0.006*"going" + 0.006*"security"
2022-10-26 10:54:15,542 : INFO : topic #1 (0.250): 0.099*"trump" + 0.083*"knew" + 0.064*"responsible" + 0.064*"mcconnell" + 0.063*"called" + 0.063*"mitch" + 0.063*"backed" + 0.063*"mccarthy" + 0.063*"kevin" + 0.025*"subpoena"
2022-10-26 10:54:15,543 : INFO : topic #2 (0.250): 0.034*"trump" + 0.022*"maga" + 0.022*"chris" + 0.022*"thing" + 0.021*"national" + 0.020*"miller" + 0.020*"person" + 0.020*"sec" + 0.020*"acting" + 0.019*"defense"
2022-10-26 10:54:15,544 : INFO : topic #3 (0.250): 0.112*"trump" + 0.022*"lost" + 0.021*"decided" + 0.020*"attack" + 0.018*"coup" + 0.018*"help" + 0.018*"video" + 0.017*"new" + 0.017*"stone" + 0.017*"roger"


[(0,
  '0.027*"knew" + 0.015*"trump" + 0.011*"pelosi" + 0.010*"amp" + 0.009*"today" + 0.008*"like" + 0.008*"still" + 0.006*"fbi" + 0.006*"going" + 0.006*"security"'),
 (1,
  '0.099*"trump" + 0.083*"knew" + 0.064*"responsible" + 0.064*"mcconnell" + 0.063*"called" + 0.063*"mitch" + 0.063*"backed" + 0.063*"mccarthy" + 0.063*"kevin" + 0.025*"subpoena"'),
 (2,
  '0.034*"trump" + 0.022*"maga" + 0.022*"chris" + 0.022*"thing" + 0.021*"national" + 0.020*"miller" + 0.020*"person" + 0.020*"sec" + 0.020*"acting" + 0.019*"defense"'),
 (3,
  '0.112*"trump" + 0.022*"lost" + 0.021*"decided" + 0.020*"attack" + 0.018*"coup" + 0.018*"help" + 0.018*"video" + 0.017*"new" + 0.017*"stone" + 0.017*"roger"')]

#### Fit LDA Model - 3 Topics

In [13]:
term_doc = doc_term.transpose()
corpus = matutils.Sparse2Corpus(term_doc)
id2word = dict((v, k) for k, v in vectorizer.vocabulary_.items())
lda2 = models.LdaModel(corpus=corpus, num_topics=3, id2word=id2word, passes=10)
### END SOLUTION
lda2

2022-10-26 10:54:15,556 : INFO : using symmetric alpha at 0.3333333333333333
2022-10-26 10:54:15,557 : INFO : using symmetric eta at 0.3333333333333333
2022-10-26 10:54:15,559 : INFO : using serial LDA version on this node
2022-10-26 10:54:15,564 : INFO : running online (multi-pass) LDA training, 3 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-26 10:54:15,573 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-26 10:54:16,805 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:16,809 : INFO : topic #0 (0.333): 0.073*"trump" + 0.017*"kevin" + 0.016*"mccarthy" + 0.016*"mcconnell" + 0.016*"called" + 0.016*"backed" + 0.016*"knew" + 0.015*"responsible" + 0.014*"believe" + 0.013*"mitch"
2022-10-26 10:54:16,810 : INFO : topic #1 (0.333): 0.031*"trump" + 0.028*"democracy" + 0.023*"

2022-10-26 10:54:19,954 : INFO : topic #2 (0.333): 0.079*"trump" + 0.070*"knew" + 0.050*"called" + 0.049*"responsible" + 0.049*"mcconnell" + 0.049*"mccarthy" + 0.049*"mitch" + 0.049*"kevin" + 0.049*"backed" + 0.012*"election"
2022-10-26 10:54:19,954 : INFO : topic diff=0.361583, rho=0.333333
2022-10-26 10:54:20,526 : INFO : -6.241 per-word bound, 75.7 perplexity estimate based on a held-out corpus of 2000 documents with 20829 words
2022-10-26 10:54:20,526 : INFO : PROGRESS: pass 0, at document #20000/34993
2022-10-26 10:54:20,843 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:20,847 : INFO : topic #0 (0.333): 0.085*"trump" + 0.021*"decided" + 0.016*"lost" + 0.014*"help" + 0.014*"people" + 0.014*"new" + 0.013*"video" + 0.013*"roger" + 0.013*"coup" + 0.013*"stone"
2022-10-26 10:54:20,848 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"trump" + 0.022*"maga" + 0.022*"donald" + 0.020*"country" + 0.020*"gop" + 0.019*"voted" + 0.019*"violen

2022-10-26 10:54:23,560 : INFO : topic #2 (0.333): 0.068*"trump" + 0.061*"knew" + 0.043*"responsible" + 0.043*"mcconnell" + 0.043*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.010*"pelosi"
2022-10-26 10:54:23,560 : INFO : topic diff=0.212719, rho=0.235702
2022-10-26 10:54:23,570 : INFO : PROGRESS: pass 1, at document #2000/34993
2022-10-26 10:54:23,876 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:23,879 : INFO : topic #0 (0.333): 0.086*"trump" + 0.015*"decided" + 0.014*"lost" + 0.013*"attack" + 0.013*"help" + 0.013*"video" + 0.013*"6th" + 0.012*"coup" + 0.012*"new" + 0.012*"stone"
2022-10-26 10:54:23,880 : INFO : topic #1 (0.333): 0.032*"democracy" + 0.026*"trump" + 0.025*"donald" + 0.021*"knew" + 0.021*"voted" + 0.020*"subpoena" + 0.018*"maga" + 0.016*"testify" + 0.016*"history" + 0.016*"gop"
2022-10-26 10:54:23,881 : INFO : topic #2 (0.333): 0.068*"trump" + 0.061*"knew" + 0.043*"responsible" + 0.043*"

2022-10-26 10:54:26,403 : INFO : topic diff=0.258950, rho=0.226476
2022-10-26 10:54:26,943 : INFO : -6.159 per-word bound, 71.4 perplexity estimate based on a held-out corpus of 2000 documents with 20829 words
2022-10-26 10:54:26,944 : INFO : PROGRESS: pass 1, at document #20000/34993
2022-10-26 10:54:27,235 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:27,238 : INFO : topic #0 (0.333): 0.083*"trump" + 0.020*"decided" + 0.015*"lost" + 0.014*"video" + 0.013*"roger" + 0.013*"help" + 0.013*"stone" + 0.013*"people" + 0.013*"6th" + 0.013*"new"
2022-10-26 10:54:27,239 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.024*"maga" + 0.022*"trump" + 0.022*"donald" + 0.021*"country" + 0.020*"gop" + 0.020*"violence" + 0.019*"knew" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:27,239 : INFO : topic #2 (0.333): 0.076*"trump" + 0.064*"knew" + 0.047*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.046*"mccarthy" + 0.046*"mitch" + 0.046*"kevin" +

2022-10-26 10:54:29,739 : INFO : topic diff=0.190276, rho=0.226476
2022-10-26 10:54:29,749 : INFO : PROGRESS: pass 2, at document #2000/34993
2022-10-26 10:54:30,045 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:30,048 : INFO : topic #0 (0.333): 0.085*"trump" + 0.015*"decided" + 0.014*"lost" + 0.013*"attack" + 0.013*"6th" + 0.013*"video" + 0.013*"help" + 0.012*"coup" + 0.012*"stone" + 0.012*"new"
2022-10-26 10:54:30,050 : INFO : topic #1 (0.333): 0.032*"democracy" + 0.025*"trump" + 0.025*"knew" + 0.025*"donald" + 0.021*"voted" + 0.020*"subpoena" + 0.018*"maga" + 0.016*"testify" + 0.016*"history" + 0.016*"country"
2022-10-26 10:54:30,052 : INFO : topic #2 (0.333): 0.068*"trump" + 0.058*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.013*"pelosi"
2022-10-26 10:54:30,053 : INFO : topic diff=0.285973, rho=0.220882
2022-10-26 10:54:30,063 : INFO : PROGRES

2022-10-26 10:54:32,968 : INFO : PROGRESS: pass 2, at document #20000/34993
2022-10-26 10:54:33,254 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:33,257 : INFO : topic #0 (0.333): 0.082*"trump" + 0.019*"decided" + 0.015*"lost" + 0.014*"video" + 0.014*"roger" + 0.013*"stone" + 0.013*"help" + 0.013*"6th" + 0.013*"people" + 0.013*"new"
2022-10-26 10:54:33,258 : INFO : topic #1 (0.333): 0.036*"democracy" + 0.024*"maga" + 0.022*"trump" + 0.022*"donald" + 0.021*"country" + 0.020*"violence" + 0.020*"knew" + 0.019*"gop" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:33,259 : INFO : topic #2 (0.333): 0.076*"trump" + 0.063*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.046*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:54:33,260 : INFO : topic diff=0.252982, rho=0.220882
2022-10-26 10:54:33,270 : INFO : PROGRESS: pass 2, at document #22000/34993
2022-10-26 10:54:33,543 : INFO : m

2022-10-26 10:54:36,215 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:36,218 : INFO : topic #0 (0.333): 0.085*"trump" + 0.015*"decided" + 0.014*"lost" + 0.014*"attack" + 0.013*"6th" + 0.013*"video" + 0.013*"help" + 0.012*"stone" + 0.012*"coup" + 0.012*"new"
2022-10-26 10:54:36,219 : INFO : topic #1 (0.333): 0.032*"democracy" + 0.026*"knew" + 0.026*"trump" + 0.025*"donald" + 0.021*"voted" + 0.020*"subpoena" + 0.018*"maga" + 0.017*"testify" + 0.016*"history" + 0.016*"country"
2022-10-26 10:54:36,221 : INFO : topic #2 (0.333): 0.068*"trump" + 0.057*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.013*"pelosi"
2022-10-26 10:54:36,221 : INFO : topic diff=0.273447, rho=0.215683
2022-10-26 10:54:36,231 : INFO : PROGRESS: pass 3, at document #4000/34993
2022-10-26 10:54:36,498 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-1

2022-10-26 10:54:39,413 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:39,416 : INFO : topic #0 (0.333): 0.082*"trump" + 0.019*"decided" + 0.015*"lost" + 0.014*"video" + 0.013*"6th" + 0.013*"roger" + 0.013*"stone" + 0.013*"help" + 0.013*"people" + 0.013*"new"
2022-10-26 10:54:39,418 : INFO : topic #1 (0.333): 0.036*"democracy" + 0.024*"maga" + 0.022*"trump" + 0.022*"donald" + 0.021*"country" + 0.020*"violence" + 0.020*"knew" + 0.019*"gop" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:39,418 : INFO : topic #2 (0.333): 0.076*"trump" + 0.063*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.046*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:54:39,419 : INFO : topic diff=0.243086, rho=0.215683
2022-10-26 10:54:39,431 : INFO : PROGRESS: pass 3, at document #22000/34993
2022-10-26 10:54:39,699 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-2

2022-10-26 10:54:42,242 : INFO : topic #0 (0.333): 0.085*"trump" + 0.015*"decided" + 0.014*"lost" + 0.014*"attack" + 0.013*"6th" + 0.013*"video" + 0.013*"help" + 0.012*"stone" + 0.012*"new" + 0.012*"roger"
2022-10-26 10:54:42,242 : INFO : topic #1 (0.333): 0.033*"democracy" + 0.026*"knew" + 0.026*"trump" + 0.025*"donald" + 0.021*"voted" + 0.021*"subpoena" + 0.019*"maga" + 0.017*"testify" + 0.016*"history" + 0.016*"country"
2022-10-26 10:54:42,243 : INFO : topic #2 (0.333): 0.068*"trump" + 0.057*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.013*"pelosi"
2022-10-26 10:54:42,244 : INFO : topic diff=0.264908, rho=0.210835
2022-10-26 10:54:42,254 : INFO : PROGRESS: pass 4, at document #4000/34993
2022-10-26 10:54:42,522 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:42,525 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.013*"

2022-10-26 10:54:45,474 : INFO : topic #0 (0.333): 0.082*"trump" + 0.019*"decided" + 0.015*"lost" + 0.014*"video" + 0.014*"6th" + 0.013*"roger" + 0.013*"stone" + 0.013*"help" + 0.013*"people" + 0.013*"new"
2022-10-26 10:54:45,475 : INFO : topic #1 (0.333): 0.036*"democracy" + 0.024*"maga" + 0.022*"trump" + 0.022*"donald" + 0.022*"country" + 0.020*"violence" + 0.020*"knew" + 0.019*"gop" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:45,476 : INFO : topic #2 (0.333): 0.076*"trump" + 0.063*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:54:45,477 : INFO : topic diff=0.235778, rho=0.210835
2022-10-26 10:54:45,487 : INFO : PROGRESS: pass 4, at document #22000/34993
2022-10-26 10:54:45,754 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:45,756 : INFO : topic #0 (0.333): 0.084*"trump" + 0.021*"decided" + 0.016*"lost" + 0.015*"vid

2022-10-26 10:54:48,214 : INFO : topic #1 (0.333): 0.033*"democracy" + 0.026*"knew" + 0.026*"trump" + 0.025*"donald" + 0.021*"voted" + 0.021*"subpoena" + 0.019*"maga" + 0.017*"testify" + 0.016*"history" + 0.016*"country"
2022-10-26 10:54:48,215 : INFO : topic #2 (0.333): 0.068*"trump" + 0.057*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.013*"pelosi"
2022-10-26 10:54:48,216 : INFO : topic diff=0.257884, rho=0.206300
2022-10-26 10:54:48,227 : INFO : PROGRESS: pass 5, at document #4000/34993
2022-10-26 10:54:48,496 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:48,499 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.013*"lost" + 0.013*"video" + 0.013*"attack" + 0.012*"help" + 0.012*"stone" + 0.011*"roger" + 0.011*"new"
2022-10-26 10:54:48,501 : INFO : topic #1 (0.333): 0.034*"democracy" + 0.023*"knew" + 0.023*"trump" + 0.02

2022-10-26 10:54:51,387 : INFO : topic #1 (0.333): 0.036*"democracy" + 0.024*"maga" + 0.022*"trump" + 0.022*"donald" + 0.022*"country" + 0.020*"violence" + 0.020*"knew" + 0.019*"gop" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:51,388 : INFO : topic #2 (0.333): 0.076*"trump" + 0.063*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:54:51,388 : INFO : topic diff=0.229974, rho=0.206300
2022-10-26 10:54:51,398 : INFO : PROGRESS: pass 5, at document #22000/34993
2022-10-26 10:54:51,667 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:51,670 : INFO : topic #0 (0.333): 0.083*"trump" + 0.021*"decided" + 0.016*"lost" + 0.015*"video" + 0.015*"roger" + 0.015*"stone" + 0.015*"help" + 0.014*"new" + 0.014*"6th" + 0.013*"coup"
2022-10-26 10:54:51,670 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"donald" + 0.023*"trump" + 0.023*"m

2022-10-26 10:54:54,089 : INFO : topic #2 (0.333): 0.068*"trump" + 0.057*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.013*"pelosi"
2022-10-26 10:54:54,090 : INFO : topic diff=0.251847, rho=0.202045
2022-10-26 10:54:54,173 : INFO : PROGRESS: pass 6, at document #4000/34993
2022-10-26 10:54:54,441 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:54,444 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.013*"video" + 0.013*"lost" + 0.013*"attack" + 0.012*"help" + 0.012*"stone" + 0.011*"roger" + 0.011*"new"
2022-10-26 10:54:54,445 : INFO : topic #1 (0.333): 0.034*"democracy" + 0.024*"knew" + 0.023*"trump" + 0.023*"donald" + 0.022*"maga" + 0.020*"subpoena" + 0.019*"country" + 0.019*"voted" + 0.019*"love" + 0.019*"without"
2022-10-26 10:54:54,446 : INFO : topic #2 (0.333): 0.065*"trump" + 0.058*"knew" + 0.042*"responsible" + 0.041

2022-10-26 10:54:57,288 : INFO : topic #2 (0.333): 0.076*"trump" + 0.063*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:54:57,289 : INFO : topic diff=0.224571, rho=0.202045
2022-10-26 10:54:57,299 : INFO : PROGRESS: pass 6, at document #22000/34993
2022-10-26 10:54:57,566 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:54:57,569 : INFO : topic #0 (0.333): 0.083*"trump" + 0.021*"decided" + 0.016*"lost" + 0.015*"video" + 0.015*"roger" + 0.015*"help" + 0.014*"stone" + 0.014*"new" + 0.014*"6th" + 0.013*"yet"
2022-10-26 10:54:57,570 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"donald" + 0.023*"maga" + 0.023*"trump" + 0.020*"country" + 0.020*"knew" + 0.019*"voted" + 0.019*"subpoena" + 0.019*"violence" + 0.018*"without"
2022-10-26 10:54:57,571 : INFO : topic #2 (0.333): 0.075*"trump" + 0.062*"knew" + 0.046*"called" + 0.046*"r

2022-10-26 10:55:00,009 : INFO : topic diff=0.246305, rho=0.198043
2022-10-26 10:55:00,019 : INFO : PROGRESS: pass 7, at document #4000/34993
2022-10-26 10:55:00,288 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:00,291 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.014*"video" + 0.013*"lost" + 0.013*"attack" + 0.012*"help" + 0.012*"stone" + 0.011*"roger" + 0.011*"new"
2022-10-26 10:55:00,292 : INFO : topic #1 (0.333): 0.034*"democracy" + 0.024*"knew" + 0.023*"trump" + 0.023*"donald" + 0.022*"maga" + 0.020*"subpoena" + 0.019*"country" + 0.019*"voted" + 0.018*"love" + 0.018*"without"
2022-10-26 10:55:00,293 : INFO : topic #2 (0.333): 0.066*"trump" + 0.058*"knew" + 0.041*"responsible" + 0.041*"called" + 0.041*"mcconnell" + 0.041*"mitch" + 0.041*"backed" + 0.041*"mccarthy" + 0.041*"kevin" + 0.016*"pelosi"
2022-10-26 10:55:00,294 : INFO : topic diff=0.222300, rho=0.198043
2022-10-26 10:55:00,304 : INFO : PROGRESS:

2022-10-26 10:55:03,360 : INFO : topic diff=0.219641, rho=0.198043
2022-10-26 10:55:03,371 : INFO : PROGRESS: pass 7, at document #22000/34993
2022-10-26 10:55:03,639 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:03,642 : INFO : topic #0 (0.333): 0.083*"trump" + 0.021*"decided" + 0.016*"lost" + 0.016*"video" + 0.015*"roger" + 0.014*"help" + 0.014*"stone" + 0.014*"new" + 0.014*"6th" + 0.013*"yet"
2022-10-26 10:55:03,643 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"maga" + 0.023*"donald" + 0.023*"trump" + 0.020*"country" + 0.020*"knew" + 0.019*"voted" + 0.019*"violence" + 0.019*"subpoena" + 0.018*"without"
2022-10-26 10:55:03,644 : INFO : topic #2 (0.333): 0.075*"trump" + 0.062*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:55:03,644 : INFO : topic diff=0.210542, rho=0.198043
2022-10-26 10:55:03,654 : INFO : PROGRES

2022-10-26 10:55:06,249 : INFO : PROGRESS: pass 8, at document #4000/34993
2022-10-26 10:55:06,547 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:06,551 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.014*"video" + 0.013*"lost" + 0.013*"attack" + 0.012*"help" + 0.012*"stone" + 0.011*"roger" + 0.011*"new"
2022-10-26 10:55:06,553 : INFO : topic #1 (0.333): 0.034*"democracy" + 0.024*"knew" + 0.023*"trump" + 0.023*"donald" + 0.022*"maga" + 0.020*"subpoena" + 0.019*"country" + 0.019*"voted" + 0.018*"love" + 0.018*"without"
2022-10-26 10:55:06,554 : INFO : topic #2 (0.333): 0.066*"trump" + 0.058*"knew" + 0.042*"responsible" + 0.041*"called" + 0.041*"mcconnell" + 0.041*"mitch" + 0.041*"backed" + 0.041*"mccarthy" + 0.041*"kevin" + 0.016*"pelosi"
2022-10-26 10:55:06,555 : INFO : topic diff=0.217868, rho=0.194270
2022-10-26 10:55:06,565 : INFO : PROGRESS: pass 8, at document #6000/34993
2022-10-26 10:55:06,868 : INFO : m

2022-10-26 10:55:09,536 : INFO : PROGRESS: pass 8, at document #22000/34993
2022-10-26 10:55:09,813 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:09,816 : INFO : topic #0 (0.333): 0.083*"trump" + 0.021*"decided" + 0.016*"lost" + 0.016*"video" + 0.014*"roger" + 0.014*"help" + 0.014*"stone" + 0.014*"new" + 0.014*"6th" + 0.013*"yet"
2022-10-26 10:55:09,817 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"maga" + 0.023*"donald" + 0.023*"trump" + 0.020*"country" + 0.020*"knew" + 0.019*"voted" + 0.019*"violence" + 0.019*"subpoena" + 0.018*"without"
2022-10-26 10:55:09,818 : INFO : topic #2 (0.333): 0.075*"trump" + 0.062*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:55:09,819 : INFO : topic diff=0.206086, rho=0.194270
2022-10-26 10:55:09,829 : INFO : PROGRESS: pass 8, at document #24000/34993
2022-10-26 10:55:10,124 : INFO 

2022-10-26 10:55:12,651 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:12,654 : INFO : topic #0 (0.333): 0.082*"trump" + 0.016*"decided" + 0.014*"6th" + 0.014*"video" + 0.013*"lost" + 0.013*"attack" + 0.012*"help" + 0.012*"stone" + 0.011*"roger" + 0.011*"new"
2022-10-26 10:55:12,656 : INFO : topic #1 (0.333): 0.034*"democracy" + 0.024*"knew" + 0.023*"trump" + 0.023*"donald" + 0.022*"maga" + 0.020*"subpoena" + 0.019*"country" + 0.019*"voted" + 0.018*"love" + 0.018*"without"
2022-10-26 10:55:12,656 : INFO : topic #2 (0.333): 0.066*"trump" + 0.058*"knew" + 0.042*"responsible" + 0.041*"called" + 0.041*"mcconnell" + 0.041*"mitch" + 0.041*"backed" + 0.041*"mccarthy" + 0.041*"kevin" + 0.016*"pelosi"
2022-10-26 10:55:12,657 : INFO : topic diff=0.213505, rho=0.190705
2022-10-26 10:55:12,667 : INFO : PROGRESS: pass 9, at document #6000/34993
2022-10-26 10:55:12,948 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-

2022-10-26 10:55:16,328 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-26 10:55:16,331 : INFO : topic #0 (0.333): 0.083*"trump" + 0.021*"decided" + 0.016*"video" + 0.016*"lost" + 0.014*"roger" + 0.014*"help" + 0.014*"stone" + 0.014*"new" + 0.013*"6th" + 0.013*"yet"
2022-10-26 10:55:16,332 : INFO : topic #1 (0.333): 0.035*"democracy" + 0.023*"maga" + 0.023*"donald" + 0.023*"trump" + 0.020*"country" + 0.020*"knew" + 0.019*"voted" + 0.019*"violence" + 0.019*"subpoena" + 0.018*"without"
2022-10-26 10:55:16,333 : INFO : topic #2 (0.333): 0.075*"trump" + 0.062*"knew" + 0.046*"called" + 0.046*"responsible" + 0.046*"mcconnell" + 0.045*"mccarthy" + 0.045*"mitch" + 0.045*"kevin" + 0.045*"backed" + 0.012*"pelosi"
2022-10-26 10:55:16,334 : INFO : topic diff=0.201611, rho=0.190705
2022-10-26 10:55:16,344 : INFO : PROGRESS: pass 9, at document #24000/34993
2022-10-26 10:55:16,625 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-1

<gensim.models.ldamodel.LdaModel at 0x7f9ca7230370>

In [14]:
lda2.print_topics()

2022-10-26 10:55:18,497 : INFO : topic #0 (0.333): 0.090*"trump" + 0.016*"decided" + 0.015*"lost" + 0.015*"attack" + 0.014*"6th" + 0.014*"help" + 0.014*"video" + 0.013*"new" + 0.013*"stone" + 0.012*"roger"
2022-10-26 10:55:18,498 : INFO : topic #1 (0.333): 0.031*"democracy" + 0.028*"knew" + 0.027*"trump" + 0.026*"donald" + 0.023*"voted" + 0.022*"subpoena" + 0.018*"testify" + 0.017*"history" + 0.017*"unanimously" + 0.017*"oath"
2022-10-26 10:55:18,500 : INFO : topic #2 (0.333): 0.068*"trump" + 0.056*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.010*"pelosi"


[(0,
  '0.090*"trump" + 0.016*"decided" + 0.015*"lost" + 0.015*"attack" + 0.014*"6th" + 0.014*"help" + 0.014*"video" + 0.013*"new" + 0.013*"stone" + 0.012*"roger"'),
 (1,
  '0.031*"democracy" + 0.028*"knew" + 0.027*"trump" + 0.026*"donald" + 0.023*"voted" + 0.022*"subpoena" + 0.018*"testify" + 0.017*"history" + 0.017*"unanimously" + 0.017*"oath"'),
 (2,
  '0.068*"trump" + 0.056*"knew" + 0.042*"responsible" + 0.042*"mcconnell" + 0.042*"called" + 0.042*"mitch" + 0.042*"backed" + 0.042*"mccarthy" + 0.042*"kevin" + 0.010*"pelosi"')]