### LDA Probabilistic Modeling - LDA uses Gensim
- LDA topic modeling that uses gensim phrases() to create bigrams
- Uses gensim simple pre-processing for tokenization

In [1]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.util import ngrams
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
import re

import gensim
from gensim import corpora, models, similarities, matutils
from gensim.models.phrases import Phrases, ENGLISH_CONNECTOR_WORDS
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# spacy for lemmatization
import spacy

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/jennihawk/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
import pyLDAvis
import pyLDAvis.gensim_models  
import matplotlib.pyplot as plt
%matplotlib inline

  from imp import reload


In [3]:
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science/NLP_Unsupervised Learning/Project_NLP/TweetBatch3.csv')
tweets

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...
...,...,...
34988,RT @Adrian_Fontes: The January 6th committee j...,rt fontes the january 6th committee just concl...
34989,#January6thCommitteeHearings and everyone runn...,january6thcommitteehearings and everyone runn...
34990,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
34991,So they are gonna subpoena Trump I am guessing...,so they are gonna subpoena trump am guessing t...


### Stop Words 

In [4]:
stop_words = stopwords.words('english')
# extend to customize list of stopwords
stop_words.extend(['rt', 'january', 'january6thcommitteehearings', 'ja', 'th', 'jan'])

### List of initially cleaned tweets

In [5]:
data = list(tweets.cleaned)

### Gensim tokenization via simple_preprocess( )
- https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/#9createbigramandtrigrammodels

In [6]:
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations
# convert to list because gensim created a generator type object and need to tell it it's a list. 
data_words = list(sent_to_words(data))

#print(data_words)

### Build Bigram Model with Gensim Phrases( )

#### Phrases Model Arguments
- Radi's code example https://radimrehurek.com/gensim/models/phrases.html
- first arg takes a sequence of lists of tokens
- min counts = minimum number of times word combination needs to be seen to be considered bigram
- threshold = Gensim detects a bigram if a scoring function for two words exceeds a threshold - big topic - see scoring
https://datascience.stackexchange.com/questions/25524/how-does-phrases-in-gensim-work
- connector_words = connector_words (set of str, optional) Set of words that may be included within a phrase, without affecting its scoring. No phrase can start nor end with a connector word; a phrase may contain any number of connector words in the middle.
- connector_words=phrases.ENGLISH_CONNECTOR_WORDS This will cause phrases to include common English articles, prepositions and conjuctions, such as bank_of_america or eye_of_the_beholder.

In [9]:
# Create training corpus. Must be a sequence of sentences (e.g. an iterable or a generator).
bigram_mod = Phrases(data_words, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS)

2022-10-27 16:56:53,275 : INFO : collecting all words and their counts
2022-10-27 16:56:53,285 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2022-10-27 16:56:53,489 : INFO : PROGRESS: at sentence #10000, processed 190666 words and 25460 word types
2022-10-27 16:56:53,687 : INFO : PROGRESS: at sentence #20000, processed 379448 words and 40019 word types
2022-10-27 16:56:53,891 : INFO : PROGRESS: at sentence #30000, processed 571741 words and 51604 word types
2022-10-27 16:56:53,990 : INFO : collected 55276 token types (unigram + bigrams) from a corpus of 666324 words and 34993 sentences
2022-10-27 16:56:53,991 : INFO : merged Phrases<55276 vocab, min_count=1, threshold=1, max_vocab_size=40000000>
2022-10-27 16:56:53,992 : INFO : Phrases lifecycle event {'msg': 'built Phrases<55276 vocab, min_count=1, threshold=1, max_vocab_size=40000000> in 0.72s', 'datetime': '2022-10-27T16:56:53.992297', 'gensim': '4.1.2', 'python': '3.9.7 (default, Sep 16 2021, 08:50:36) \n[Cl

#### EDA: See how the trained bigram model is working on unseen sentences

In [10]:
# Apply the trained phrases model to a new, unseen sentence.
new_sentence = ['tfgs', 'big', 'lie', 'to', 'steal', 'the', 'election', 'was', 'premeditated', 'months', 'before', 'the', 'election']

In [11]:
# Apply the trained phrases model to a new, unseen sentence.
# The toy model considered 'premeditated', 'months' a single phrase => joined the two
# tokens into a single "phrase" token, using our selected `_` delimiter.
bigram_mod[new_sentence]

['tfgs',
 'big_lie',
 'to',
 'steal_the_election',
 'was',
 'premeditated_months',
 'before_the_election']

### Functions to Remove Stopwords and Make Bigrams
- lemmatization also included when necessary

In [12]:
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]

def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]

# def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
#     """https://spacy.io/api/annotation"""
#     texts_out = []
#     for sent in texts:
#         doc = nlp(" ".join(sent)) 
#         texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
#     return texts_out

### Call the functions in order

In [13]:
# Remove Stop Words
data_words_nostops = remove_stopwords(data_words)

# Form Bigrams
data_words_bigrams = make_bigrams(data_words_nostops)

# Initialize spacy 'en' model, keeping only tagger component (for efficiency)
# python3 -m spacy download en
#nlp = spacy.load('en', disable=['parser', 'ner'])
#nlp = spacy.load("en_core_web_sm")

# Do lemmatization keeping only noun, adj, vb, adv
#data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

In [14]:
print(data_words_bigrams[:1])

[['roevember', 'forthepeople', 'votebluein', 'oathbreakermaga']]


### Prepare the Inputs for Modeling
- two main inputs to the LDA topic model: 
- the dictionary (id2word) and 
-the corpus.

In [15]:
#Create Dictionary
#corpora module implements concept of dictionary between words and their integer ids 
id2word = corpora.Dictionary(data_words_bigrams)

2022-10-27 16:56:56,991 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2022-10-27 16:56:57,111 : INFO : adding document #10000 to Dictionary(5988 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-27 16:56:57,227 : INFO : adding document #20000 to Dictionary(8686 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-27 16:56:57,343 : INFO : adding document #30000 to Dictionary(10543 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-27 16:56:57,400 : INFO : built Dictionary(11136 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...) from 34993 documents (total 245197 corpus positions)
2022-10-27 16:56:57,401 : INFO : Dictionary lifecycle event {'msg': "built Dictionary(11136 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chr

In [16]:
id2word[1]

'oathbreakermaga'

#### Create bag of words corpus
- corpus contains the word id and its frequency in every document https://www.tutorialspoint.com/gensim/gensim_creating_a_bag_of_words_corpus.htm
- This is used as input into LDA model

In [17]:
# bag of words corpus
corpus2 = [id2word.doc2bow(text) for text in data_words_bigrams]

In [18]:
# View: Gensim creates a unique id for each word in the document. Tuples are word id, word frequency
# This is used as teh input by the LDA model. 
print(corpus2[:1])

[[(0, 1), (1, 1), (2, 1), (3, 1)]]


In [19]:
#to see what word a given id corresponds to, pass the id as a key to the dictionary
id2word[0]

'forthepeople'

In [20]:
# Human readable format of corpus (term-frequency)
[[(id2word[id], freq) for id, freq in cp] for cp in corpus2[:1]]

[[('forthepeople', 1),
  ('oathbreakermaga', 1),
  ('roevember', 1),
  ('votebluein', 1)]]

### Build the topic model: Training the LDA Model
- More on the parameters https://miningthedetails.com/blog/python/lda/GensimLDA/
- passes: is the total number of training passes.
- random_state: ({np.random.RandomState, int}, optional) – Either a randomState object or a seed to generate one. Useful for reproducibility.

In [21]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus2,
                                           id2word=id2word,
                                           num_topics=4, 
                                           random_state=100,
                                           passes=10)

2022-10-27 16:56:57,666 : INFO : using symmetric alpha at 0.25
2022-10-27 16:56:57,667 : INFO : using symmetric eta at 0.25
2022-10-27 16:56:57,671 : INFO : using serial LDA version on this node
2022-10-27 16:56:57,677 : INFO : running online (multi-pass) LDA training, 4 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-27 16:56:57,678 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-27 16:56:58,633 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:56:58,637 : INFO : topic #0 (0.250): 0.041*"knew" + 0.028*"democracy" + 0.026*"decided" + 0.026*"loss" + 0.026*"rejection" + 0.026*"accept" + 0.026*"break" + 0.026*"american_people" + 0.026*"trump_tried" + 0.018*"trump"
2022-10-27 16:56:58,638 : INFO : topic #1 (0.250): 0.036*"violence" + 0.035*"maga_gop" + 0.035*"mark_hamill" + 0.

2022-10-27 16:57:00,420 : INFO : topic #1 (0.250): 0.044*"violence" + 0.041*"thcomm" + 0.041*"love" + 0.041*"maga_gop" + 0.040*"mark_hamill" + 0.040*"overthrowing_democracy" + 0.040*"country_without_creating" + 0.040*"correct" + 0.027*"trump" + 0.022*"says"
2022-10-27 16:57:00,421 : INFO : topic #2 (0.250): 0.038*"trump" + 0.026*"roger_stone" + 0.025*"trump_lost" + 0.024*"yet" + 0.024*"help" + 0.023*"mark_meadows" + 0.023*"decided" + 0.023*"hatch_coup" + 0.023*"rudy_giuliani" + 0.023*"new_video"
2022-10-27 16:57:00,423 : INFO : topic #3 (0.250): 0.085*"called" + 0.085*"kevin_mccarthy" + 0.085*"responsible" + 0.085*"mitch_mcconnell" + 0.085*"knew_trump" + 0.085*"backed" + 0.013*"thing" + 0.012*"person" + 0.012*"national" + 0.012*"sec"
2022-10-27 16:57:00,423 : INFO : topic diff=0.366616, rho=0.377964
2022-10-27 16:57:00,424 : INFO : PROGRESS: pass 0, at document #16000/34993
2022-10-27 16:57:00,675 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 1

2022-10-27 16:57:02,377 : INFO : topic diff=0.273028, rho=0.277350
2022-10-27 16:57:02,378 : INFO : PROGRESS: pass 0, at document #28000/34993
2022-10-27 16:57:02,635 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:02,639 : INFO : topic #0 (0.250): 0.059*"knew" + 0.035*"trump" + 0.021*"democracy" + 0.014*"american_people" + 0.014*"trump_tried" + 0.014*"accept" + 0.014*"break" + 0.014*"lost" + 0.014*"decided" + 0.014*"loss"
2022-10-27 16:57:02,640 : INFO : topic #1 (0.250): 0.039*"trump" + 0.029*"subpoena_donald" + 0.029*"testify" + 0.027*"voted_unanimously" + 0.027*"oath_democracy" + 0.027*"history_deserves" + 0.027*"demands" + 0.025*"violence" + 0.023*"thcomm" + 0.022*"love"
2022-10-27 16:57:02,641 : INFO : topic #2 (0.250): 0.033*"trump" + 0.032*"roger_stone" + 0.029*"trump_lost" + 0.029*"help" + 0.028*"yet" + 0.027*"mark_meadows" + 0.027*"decided" + 0.027*"hatch_coup" + 0.027*"rudy_giuliani" + 0.027*"new_video"
2022-10-27 16:57:02,643 : 

2022-10-27 16:57:04,318 : INFO : topic #3 (0.250): 0.080*"called" + 0.079*"responsible" + 0.079*"mitch_mcconnell" + 0.079*"knew_trump" + 0.079*"kevin_mccarthy" + 0.079*"backed" + 0.016*"thing" + 0.015*"person" + 0.015*"sec" + 0.015*"national"
2022-10-27 16:57:04,318 : INFO : topic diff=0.258355, rho=0.226476
2022-10-27 16:57:04,319 : INFO : PROGRESS: pass 1, at document #6000/34993
2022-10-27 16:57:04,567 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:04,571 : INFO : topic #0 (0.250): 0.051*"knew" + 0.027*"trump" + 0.021*"democracy" + 0.014*"american_people" + 0.014*"accept" + 0.014*"trump_tried" + 0.014*"loss" + 0.014*"break" + 0.014*"decided" + 0.014*"rejection"
2022-10-27 16:57:04,573 : INFO : topic #1 (0.250): 0.038*"violence" + 0.036*"thcomm" + 0.035*"love" + 0.035*"maga_gop" + 0.034*"correct" + 0.034*"mark_hamill" + 0.034*"overthrowing_democracy" + 0.034*"country_without_creating" + 0.030*"trump" + 0.021*"testify"
2022-10-27 16:57:04

2022-10-27 16:57:06,001 : INFO : topic #2 (0.250): 0.036*"trump" + 0.028*"roger_stone" + 0.026*"trump_lost" + 0.025*"help" + 0.024*"yet" + 0.024*"mark_meadows" + 0.023*"decided" + 0.023*"hatch_coup" + 0.023*"rudy_giuliani" + 0.023*"new_video"
2022-10-27 16:57:06,002 : INFO : topic #3 (0.250): 0.093*"called" + 0.091*"responsible" + 0.091*"mitch_mcconnell" + 0.091*"kevin_mccarthy" + 0.091*"knew_trump" + 0.091*"backed" + 0.013*"thing" + 0.013*"person" + 0.013*"national" + 0.013*"sec"
2022-10-27 16:57:06,002 : INFO : topic diff=0.226809, rho=0.226476
2022-10-27 16:57:06,378 : INFO : -6.457 per-word bound, 87.9 perplexity estimate based on a held-out corpus of 2000 documents with 14367 words
2022-10-27 16:57:06,378 : INFO : PROGRESS: pass 1, at document #20000/34993
2022-10-27 16:57:06,630 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:06,634 : INFO : topic #0 (0.250): 0.049*"knew" + 0.037*"trump" + 0.021*"democracy" + 0.015*"american_people" +

2022-10-27 16:57:08,103 : INFO : topic #0 (0.250): 0.053*"knew" + 0.035*"trump" + 0.016*"democracy" + 0.013*"lost" + 0.011*"american_people" + 0.011*"accept" + 0.011*"break" + 0.011*"trump_tried" + 0.011*"loss" + 0.010*"decided"
2022-10-27 16:57:08,104 : INFO : topic #1 (0.250): 0.044*"trump" + 0.033*"testify" + 0.033*"subpoena_donald" + 0.031*"voted_unanimously" + 0.031*"demands" + 0.031*"oath_democracy" + 0.031*"history_deserves" + 0.027*"violence" + 0.025*"thcomm" + 0.025*"love"
2022-10-27 16:57:08,104 : INFO : topic #2 (0.250): 0.035*"trump" + 0.030*"roger_stone" + 0.028*"trump_lost" + 0.027*"help" + 0.026*"yet" + 0.026*"mark_meadows" + 0.025*"decided" + 0.025*"hatch_coup" + 0.025*"rudy_giuliani" + 0.025*"new_video"
2022-10-27 16:57:08,106 : INFO : topic #3 (0.250): 0.075*"called" + 0.074*"responsible" + 0.074*"knew_trump" + 0.074*"mitch_mcconnell" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.020*"thing" + 0.019*"person" + 0.018*"sec" + 0.018*"national"
2022-10-27 16:57:08,106 : I

2022-10-27 16:57:09,711 : INFO : topic diff=0.213678, rho=0.220882
2022-10-27 16:57:09,712 : INFO : PROGRESS: pass 2, at document #10000/34993
2022-10-27 16:57:09,960 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:09,965 : INFO : topic #0 (0.250): 0.045*"knew" + 0.028*"trump" + 0.022*"democracy" + 0.015*"american_people" + 0.015*"trump_tried" + 0.015*"accept" + 0.015*"break" + 0.015*"loss" + 0.015*"decided" + 0.015*"rejection"
2022-10-27 16:57:09,966 : INFO : topic #1 (0.250): 0.043*"violence" + 0.041*"thcomm" + 0.041*"love" + 0.040*"maga_gop" + 0.040*"correct" + 0.040*"overthrowing_democracy" + 0.040*"country_without_creating" + 0.040*"mark_hamill" + 0.026*"trump" + 0.025*"want"
2022-10-27 16:57:09,968 : INFO : topic #2 (0.250): 0.036*"trump" + 0.024*"roger_stone" + 0.022*"trump_lost" + 0.022*"help" + 0.021*"violence" + 0.021*"yet" + 0.020*"mark_meadows" + 0.020*"new_video" + 0.020*"decided" + 0.020*"hatch_coup"
2022-10-27 16:57:09,970 : 

2022-10-27 16:57:11,846 : INFO : topic #2 (0.250): 0.033*"trump" + 0.032*"roger_stone" + 0.029*"trump_lost" + 0.028*"help" + 0.028*"yet" + 0.027*"mark_meadows" + 0.027*"decided" + 0.027*"rudy_giuliani" + 0.027*"hatch_coup" + 0.027*"new_video"
2022-10-27 16:57:11,848 : INFO : topic #3 (0.250): 0.091*"called" + 0.090*"responsible" + 0.090*"mitch_mcconnell" + 0.090*"knew_trump" + 0.090*"kevin_mccarthy" + 0.089*"backed" + 0.015*"thing" + 0.015*"person" + 0.015*"miller" + 0.015*"deploy"
2022-10-27 16:57:11,848 : INFO : topic diff=0.192591, rho=0.220882
2022-10-27 16:57:11,849 : INFO : PROGRESS: pass 2, at document #24000/34993
2022-10-27 16:57:12,078 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:12,082 : INFO : topic #0 (0.250): 0.050*"knew" + 0.036*"trump" + 0.020*"democracy" + 0.015*"american_people" + 0.015*"trump_tried" + 0.015*"accept" + 0.015*"break" + 0.014*"loss" + 0.014*"rejection" + 0.014*"decided"
2022-10-27 16:57:12,083 : INFO : to

2022-10-27 16:57:13,540 : INFO : topic #0 (0.250): 0.049*"knew" + 0.029*"trump" + 0.015*"democracy" + 0.012*"lost" + 0.010*"american_people" + 0.010*"break" + 0.010*"trump_tried" + 0.010*"accept" + 0.010*"loss" + 0.010*"rejection"
2022-10-27 16:57:13,541 : INFO : topic #1 (0.250): 0.052*"trump" + 0.039*"subpoena_donald" + 0.039*"testify" + 0.037*"voted_unanimously" + 0.037*"demands" + 0.037*"history_deserves" + 0.037*"oath_democracy" + 0.024*"violence" + 0.023*"thcomm" + 0.022*"love"
2022-10-27 16:57:13,542 : INFO : topic #2 (0.250): 0.038*"trump" + 0.027*"roger_stone" + 0.026*"trump_lost" + 0.024*"help" + 0.023*"yet" + 0.023*"mark_meadows" + 0.023*"decided" + 0.023*"rudy_giuliani" + 0.023*"hatch_coup" + 0.022*"new_video"
2022-10-27 16:57:13,544 : INFO : topic #3 (0.250): 0.073*"called" + 0.073*"mitch_mcconnell" + 0.073*"knew_trump" + 0.073*"responsible" + 0.073*"kevin_mccarthy" + 0.073*"backed" + 0.020*"thing" + 0.018*"sec" + 0.018*"person" + 0.018*"national"
2022-10-27 16:57:13,545 :

2022-10-27 16:57:15,005 : INFO : topic diff=0.177844, rho=0.215683
2022-10-27 16:57:15,006 : INFO : PROGRESS: pass 3, at document #14000/34993
2022-10-27 16:57:15,229 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:15,233 : INFO : topic #0 (0.250): 0.044*"knew" + 0.033*"trump" + 0.020*"democracy" + 0.014*"accept" + 0.013*"american_people" + 0.013*"break" + 0.013*"trump_tried" + 0.013*"loss" + 0.013*"rejection" + 0.013*"decided"
2022-10-27 16:57:15,234 : INFO : topic #1 (0.250): 0.043*"violence" + 0.041*"thcomm" + 0.040*"love" + 0.040*"maga_gop" + 0.039*"correct" + 0.039*"mark_hamill" + 0.039*"country_without_creating" + 0.039*"overthrowing_democracy" + 0.029*"trump" + 0.022*"says"
2022-10-27 16:57:15,235 : INFO : topic #2 (0.250): 0.035*"trump" + 0.027*"roger_stone" + 0.024*"trump_lost" + 0.023*"help" + 0.023*"yet" + 0.023*"mark_meadows" + 0.022*"decided" + 0.022*"hatch_coup" + 0.022*"rudy_giuliani" + 0.022*"new_video"
2022-10-27 16:57:15,2

2022-10-27 16:57:17,081 : INFO : topic #2 (0.250): 0.033*"trump" + 0.033*"roger_stone" + 0.030*"trump_lost" + 0.030*"help" + 0.029*"yet" + 0.028*"mark_meadows" + 0.028*"decided" + 0.028*"rudy_giuliani" + 0.028*"hatch_coup" + 0.028*"new_video"
2022-10-27 16:57:17,082 : INFO : topic #3 (0.250): 0.084*"called" + 0.083*"knew_trump" + 0.083*"responsible" + 0.083*"mitch_mcconnell" + 0.083*"kevin_mccarthy" + 0.083*"backed" + 0.017*"thing" + 0.017*"person" + 0.017*"miller" + 0.017*"national"
2022-10-27 16:57:17,084 : INFO : topic diff=0.168474, rho=0.215683
2022-10-27 16:57:17,085 : INFO : PROGRESS: pass 3, at document #28000/34993
2022-10-27 16:57:17,338 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:17,342 : INFO : topic #0 (0.250): 0.051*"knew" + 0.035*"trump" + 0.019*"democracy" + 0.013*"american_people" + 0.013*"trump_tried" + 0.012*"lost" + 0.012*"accept" + 0.012*"break" + 0.012*"decided" + 0.012*"loss"
2022-10-27 16:57:17,343 : INFO : topic

2022-10-27 16:57:18,886 : INFO : topic #1 (0.250): 0.035*"trump" + 0.034*"violence" + 0.033*"thcomm" + 0.032*"love" + 0.032*"maga_gop" + 0.031*"correct" + 0.031*"overthrowing_democracy" + 0.031*"mark_hamill" + 0.031*"country_without_creating" + 0.026*"testify"
2022-10-27 16:57:18,887 : INFO : topic #2 (0.250): 0.033*"trump" + 0.025*"roger_stone" + 0.023*"trump_lost" + 0.022*"help" + 0.021*"yet" + 0.021*"mark_meadows" + 0.021*"violence" + 0.020*"decided" + 0.020*"hatch_coup" + 0.020*"rudy_giuliani"
2022-10-27 16:57:18,888 : INFO : topic #3 (0.250): 0.083*"called" + 0.083*"responsible" + 0.083*"mitch_mcconnell" + 0.083*"knew_trump" + 0.083*"kevin_mccarthy" + 0.083*"backed" + 0.017*"thing" + 0.016*"person" + 0.016*"sec" + 0.015*"national"
2022-10-27 16:57:18,888 : INFO : topic diff=0.196301, rho=0.210835
2022-10-27 16:57:18,889 : INFO : PROGRESS: pass 4, at document #6000/34993
2022-10-27 16:57:19,113 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 

2022-10-27 16:57:20,467 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:20,471 : INFO : topic #0 (0.250): 0.046*"knew" + 0.035*"trump" + 0.019*"democracy" + 0.013*"american_people" + 0.013*"accept" + 0.013*"lost" + 0.013*"trump_tried" + 0.013*"loss" + 0.013*"break" + 0.012*"rejection"
2022-10-27 16:57:20,472 : INFO : topic #1 (0.250): 0.044*"violence" + 0.041*"thcomm" + 0.041*"love" + 0.040*"maga_gop" + 0.039*"correct" + 0.039*"overthrowing_democracy" + 0.039*"mark_hamill" + 0.039*"country_without_creating" + 0.033*"trump" + 0.025*"testify"
2022-10-27 16:57:20,473 : INFO : topic #2 (0.250): 0.035*"trump" + 0.029*"roger_stone" + 0.027*"trump_lost" + 0.025*"help" + 0.025*"yet" + 0.025*"mark_meadows" + 0.024*"decided" + 0.024*"rudy_giuliani" + 0.024*"hatch_coup" + 0.024*"new_video"
2022-10-27 16:57:20,474 : INFO : topic #3 (0.250): 0.095*"called" + 0.094*"responsible" + 0.094*"mitch_mcconnell" + 0.094*"kevin_mccarthy" + 0.094*"knew_trump" + 0.

2022-10-27 16:57:22,188 : INFO : topic #3 (0.250): 0.078*"called" + 0.078*"knew_trump" + 0.078*"responsible" + 0.077*"mitch_mcconnell" + 0.077*"kevin_mccarthy" + 0.077*"backed" + 0.020*"thing" + 0.018*"person" + 0.018*"miller" + 0.018*"sec"
2022-10-27 16:57:22,188 : INFO : topic diff=0.168356, rho=0.210835
2022-10-27 16:57:22,189 : INFO : PROGRESS: pass 4, at document #32000/34993
2022-10-27 16:57:22,403 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:22,407 : INFO : topic #0 (0.250): 0.050*"knew" + 0.034*"trump" + 0.016*"democracy" + 0.013*"lost" + 0.011*"american_people" + 0.010*"accept" + 0.010*"break" + 0.010*"trump_tried" + 0.010*"loss" + 0.010*"decided"
2022-10-27 16:57:22,409 : INFO : topic #1 (0.250): 0.043*"trump" + 0.033*"testify" + 0.033*"subpoena_donald" + 0.031*"voted_unanimously" + 0.031*"demands" + 0.031*"history_deserves" + 0.031*"oath_democracy" + 0.028*"violence" + 0.026*"thcomm" + 0.026*"love"
2022-10-27 16:57:22,410 : IN

2022-10-27 16:57:24,214 : INFO : topic #2 (0.250): 0.034*"trump" + 0.024*"roger_stone" + 0.022*"trump_lost" + 0.021*"help" + 0.021*"violence" + 0.020*"yet" + 0.020*"mark_meadows" + 0.019*"new_video" + 0.019*"decided" + 0.019*"hatch_coup"
2022-10-27 16:57:24,215 : INFO : topic #3 (0.250): 0.090*"called" + 0.090*"responsible" + 0.089*"kevin_mccarthy" + 0.089*"mitch_mcconnell" + 0.089*"knew_trump" + 0.089*"backed" + 0.017*"thing" + 0.016*"person" + 0.016*"national" + 0.016*"miller"
2022-10-27 16:57:24,216 : INFO : topic diff=0.181657, rho=0.206300
2022-10-27 16:57:24,217 : INFO : PROGRESS: pass 5, at document #10000/34993
2022-10-27 16:57:24,593 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:24,602 : INFO : topic #0 (0.250): 0.041*"knew" + 0.029*"trump" + 0.020*"democracy" + 0.014*"american_people" + 0.014*"trump_tried" + 0.014*"accept" + 0.014*"break" + 0.014*"loss" + 0.013*"decided" + 0.013*"rejection"
2022-10-27 16:57:24,604 : INFO : topic

2022-10-27 16:57:26,642 : INFO : topic #0 (0.250): 0.043*"knew" + 0.035*"trump" + 0.020*"democracy" + 0.015*"american_people" + 0.014*"accept" + 0.014*"trump_tried" + 0.014*"loss" + 0.014*"break" + 0.014*"rejection" + 0.014*"decided"
2022-10-27 16:57:26,643 : INFO : topic #1 (0.250): 0.039*"trump" + 0.039*"violence" + 0.036*"thcomm" + 0.035*"love" + 0.034*"maga_gop" + 0.034*"correct" + 0.034*"mark_hamill" + 0.034*"overthrowing_democracy" + 0.034*"country_without_creating" + 0.028*"subpoena_donald"
2022-10-27 16:57:26,644 : INFO : topic #2 (0.250): 0.034*"trump" + 0.033*"roger_stone" + 0.030*"trump_lost" + 0.029*"help" + 0.028*"yet" + 0.028*"mark_meadows" + 0.028*"decided" + 0.028*"hatch_coup" + 0.028*"rudy_giuliani" + 0.027*"new_video"
2022-10-27 16:57:26,645 : INFO : topic #3 (0.250): 0.092*"called" + 0.091*"responsible" + 0.091*"mitch_mcconnell" + 0.091*"knew_trump" + 0.091*"kevin_mccarthy" + 0.091*"backed" + 0.015*"thing" + 0.015*"person" + 0.015*"miller" + 0.015*"deploy"
2022-10-27

2022-10-27 16:57:28,037 : INFO : topic diff=0.131025, rho=0.206300
2022-10-27 16:57:28,268 : INFO : -6.161 per-word bound, 71.5 perplexity estimate based on a held-out corpus of 993 documents with 6741 words
2022-10-27 16:57:28,269 : INFO : PROGRESS: pass 5, at document #34993/34993
2022-10-27 16:57:28,383 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-27 16:57:28,387 : INFO : topic #0 (0.250): 0.047*"knew" + 0.030*"trump" + 0.015*"democracy" + 0.011*"lost" + 0.010*"american_people" + 0.010*"break" + 0.010*"trump_tried" + 0.010*"accept" + 0.009*"loss" + 0.009*"decided"
2022-10-27 16:57:28,388 : INFO : topic #1 (0.250): 0.051*"trump" + 0.039*"subpoena_donald" + 0.039*"testify" + 0.037*"voted_unanimously" + 0.037*"demands" + 0.037*"oath_democracy" + 0.037*"history_deserves" + 0.026*"violence" + 0.024*"thcomm" + 0.023*"love"
2022-10-27 16:57:28,389 : INFO : topic #2 (0.250): 0.038*"trump" + 0.028*"roger_stone" + 0.027*"trump_lost" + 0.025*"help" + 0.02

2022-10-27 16:57:29,824 : INFO : topic #2 (0.250): 0.034*"trump" + 0.026*"roger_stone" + 0.024*"trump_lost" + 0.023*"help" + 0.022*"yet" + 0.022*"mark_meadows" + 0.021*"decided" + 0.021*"new_video" + 0.021*"hatch_coup" + 0.021*"rudy_giuliani"
2022-10-27 16:57:29,826 : INFO : topic #3 (0.250): 0.092*"called" + 0.091*"responsible" + 0.091*"kevin_mccarthy" + 0.091*"mitch_mcconnell" + 0.091*"knew_trump" + 0.091*"backed" + 0.015*"thing" + 0.015*"person" + 0.014*"national" + 0.014*"miller"
2022-10-27 16:57:29,827 : INFO : topic diff=0.156129, rho=0.202045
2022-10-27 16:57:29,827 : INFO : PROGRESS: pass 6, at document #14000/34993
2022-10-27 16:57:30,054 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:30,058 : INFO : topic #0 (0.250): 0.042*"knew" + 0.033*"trump" + 0.019*"democracy" + 0.013*"accept" + 0.013*"american_people" + 0.012*"break" + 0.012*"trump_tried" + 0.012*"loss" + 0.012*"rejection" + 0.012*"decided"
2022-10-27 16:57:30,059 : INFO : 

2022-10-27 16:57:31,811 : INFO : topic #0 (0.250): 0.048*"knew" + 0.035*"trump" + 0.019*"democracy" + 0.013*"american_people" + 0.013*"trump_tried" + 0.013*"accept" + 0.013*"break" + 0.013*"loss" + 0.013*"decided" + 0.013*"rejection"
2022-10-27 16:57:31,812 : INFO : topic #1 (0.250): 0.037*"trump" + 0.032*"violence" + 0.029*"thcomm" + 0.029*"subpoena_donald" + 0.029*"love" + 0.029*"testify" + 0.028*"maga_gop" + 0.027*"correct" + 0.027*"country_without_creating" + 0.027*"mark_hamill"
2022-10-27 16:57:31,813 : INFO : topic #2 (0.250): 0.034*"trump" + 0.033*"roger_stone" + 0.031*"trump_lost" + 0.030*"help" + 0.029*"yet" + 0.029*"mark_meadows" + 0.029*"decided" + 0.029*"rudy_giuliani" + 0.029*"hatch_coup" + 0.028*"new_video"
2022-10-27 16:57:31,814 : INFO : topic #3 (0.250): 0.085*"called" + 0.084*"knew_trump" + 0.084*"responsible" + 0.084*"mitch_mcconnell" + 0.084*"kevin_mccarthy" + 0.084*"backed" + 0.017*"thing" + 0.017*"person" + 0.017*"national" + 0.017*"miller"
2022-10-27 16:57:31,815

2022-10-27 16:57:33,280 : INFO : topic diff=0.205717, rho=0.198043
2022-10-27 16:57:33,281 : INFO : PROGRESS: pass 7, at document #4000/34993
2022-10-27 16:57:33,515 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:33,519 : INFO : topic #0 (0.250): 0.048*"knew" + 0.028*"trump" + 0.016*"democracy" + 0.012*"american_people" + 0.011*"trump_tried" + 0.011*"loss" + 0.011*"accept" + 0.011*"break" + 0.011*"decided" + 0.011*"rejection"
2022-10-27 16:57:33,520 : INFO : topic #1 (0.250): 0.035*"trump" + 0.034*"violence" + 0.033*"thcomm" + 0.032*"love" + 0.032*"maga_gop" + 0.031*"correct" + 0.031*"overthrowing_democracy" + 0.031*"mark_hamill" + 0.031*"country_without_creating" + 0.027*"testify"
2022-10-27 16:57:33,521 : INFO : topic #2 (0.250): 0.034*"trump" + 0.026*"roger_stone" + 0.024*"trump_lost" + 0.023*"help" + 0.022*"yet" + 0.022*"mark_meadows" + 0.021*"decided" + 0.021*"violence" + 0.021*"hatch_coup" + 0.021*"rudy_giuliani"
2022-10-27 16:57:33,

2022-10-27 16:57:34,921 : INFO : topic #3 (0.250): 0.096*"called" + 0.095*"responsible" + 0.095*"kevin_mccarthy" + 0.095*"knew_trump" + 0.095*"mitch_mcconnell" + 0.095*"backed" + 0.014*"thing" + 0.014*"person" + 0.014*"national" + 0.014*"sec"
2022-10-27 16:57:34,921 : INFO : topic diff=0.147613, rho=0.198043
2022-10-27 16:57:34,922 : INFO : PROGRESS: pass 7, at document #18000/34993
2022-10-27 16:57:35,155 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:35,160 : INFO : topic #0 (0.250): 0.045*"knew" + 0.035*"trump" + 0.018*"democracy" + 0.013*"american_people" + 0.013*"accept" + 0.012*"trump_tried" + 0.012*"lost" + 0.012*"loss" + 0.012*"break" + 0.012*"rejection"
2022-10-27 16:57:35,161 : INFO : topic #1 (0.250): 0.044*"violence" + 0.041*"thcomm" + 0.041*"love" + 0.040*"maga_gop" + 0.039*"correct" + 0.039*"mark_hamill" + 0.039*"overthrowing_democracy" + 0.039*"country_without_creating" + 0.033*"trump" + 0.025*"testify"
2022-10-27 16:57:35,1

2022-10-27 16:57:36,923 : INFO : topic #1 (0.250): 0.040*"trump" + 0.031*"testify" + 0.031*"subpoena_donald" + 0.029*"violence" + 0.029*"voted_unanimously" + 0.029*"demands" + 0.029*"oath_democracy" + 0.029*"history_deserves" + 0.027*"thcomm" + 0.026*"love"
2022-10-27 16:57:36,924 : INFO : topic #2 (0.250): 0.034*"trump" + 0.033*"roger_stone" + 0.030*"trump_lost" + 0.029*"help" + 0.028*"yet" + 0.028*"mark_meadows" + 0.028*"decided" + 0.028*"rudy_giuliani" + 0.028*"hatch_coup" + 0.028*"new_video"
2022-10-27 16:57:36,925 : INFO : topic #3 (0.250): 0.079*"called" + 0.078*"knew_trump" + 0.078*"responsible" + 0.078*"mitch_mcconnell" + 0.078*"kevin_mccarthy" + 0.078*"backed" + 0.020*"thing" + 0.018*"person" + 0.018*"miller" + 0.018*"sec"
2022-10-27 16:57:36,926 : INFO : topic diff=0.149843, rho=0.198043
2022-10-27 16:57:36,927 : INFO : PROGRESS: pass 7, at document #32000/34993
2022-10-27 16:57:37,143 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:

2022-10-27 16:57:38,394 : INFO : topic diff=0.198671, rho=0.194270
2022-10-27 16:57:38,395 : INFO : PROGRESS: pass 8, at document #8000/34993
2022-10-27 16:57:38,613 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:38,616 : INFO : topic #0 (0.250): 0.041*"knew" + 0.028*"trump" + 0.018*"democracy" + 0.012*"american_people" + 0.012*"accept" + 0.012*"trump_tried" + 0.012*"break" + 0.012*"loss" + 0.012*"decided" + 0.012*"rejection"
2022-10-27 16:57:38,618 : INFO : topic #1 (0.250): 0.042*"violence" + 0.040*"thcomm" + 0.040*"love" + 0.039*"maga_gop" + 0.039*"correct" + 0.039*"mark_hamill" + 0.039*"country_without_creating" + 0.039*"overthrowing_democracy" + 0.028*"trump" + 0.021*"want"
2022-10-27 16:57:38,619 : INFO : topic #2 (0.250): 0.034*"trump" + 0.024*"roger_stone" + 0.023*"trump_lost" + 0.022*"help" + 0.021*"violence" + 0.021*"yet" + 0.020*"mark_meadows" + 0.020*"decided" + 0.020*"new_video" + 0.020*"rudy_giuliani"
2022-10-27 16:57:38,619 

2022-10-27 16:57:40,345 : INFO : topic #2 (0.250): 0.035*"trump" + 0.030*"roger_stone" + 0.028*"trump_lost" + 0.026*"help" + 0.026*"yet" + 0.026*"mark_meadows" + 0.025*"decided" + 0.025*"rudy_giuliani" + 0.025*"hatch_coup" + 0.025*"new_video"
2022-10-27 16:57:40,346 : INFO : topic #3 (0.250): 0.094*"called" + 0.092*"responsible" + 0.092*"mitch_mcconnell" + 0.092*"kevin_mccarthy" + 0.092*"knew_trump" + 0.092*"backed" + 0.015*"thing" + 0.014*"person" + 0.014*"sec" + 0.014*"national"
2022-10-27 16:57:40,347 : INFO : topic diff=0.154077, rho=0.194270
2022-10-27 16:57:40,347 : INFO : PROGRESS: pass 8, at document #22000/34993
2022-10-27 16:57:40,570 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:40,574 : INFO : topic #0 (0.250): 0.042*"knew" + 0.035*"trump" + 0.019*"democracy" + 0.014*"american_people" + 0.014*"accept" + 0.014*"trump_tried" + 0.014*"loss" + 0.014*"break" + 0.014*"decided" + 0.014*"rejection"
2022-10-27 16:57:40,575 : INFO : top

2022-10-27 16:57:41,918 : INFO : topic #1 (0.250): 0.047*"trump" + 0.036*"subpoena_donald" + 0.035*"testify" + 0.034*"voted_unanimously" + 0.033*"demands" + 0.033*"history_deserves" + 0.033*"oath_democracy" + 0.027*"violence" + 0.025*"thcomm" + 0.024*"love"
2022-10-27 16:57:41,918 : INFO : topic #2 (0.250): 0.037*"trump" + 0.030*"roger_stone" + 0.028*"trump_lost" + 0.027*"help" + 0.026*"yet" + 0.026*"mark_meadows" + 0.026*"decided" + 0.025*"hatch_coup" + 0.025*"rudy_giuliani" + 0.025*"new_video"
2022-10-27 16:57:41,919 : INFO : topic #3 (0.250): 0.076*"called" + 0.076*"mitch_mcconnell" + 0.076*"responsible" + 0.076*"knew_trump" + 0.076*"kevin_mccarthy" + 0.076*"backed" + 0.020*"thing" + 0.019*"person" + 0.019*"national" + 0.019*"sec"
2022-10-27 16:57:41,920 : INFO : topic diff=0.119122, rho=0.194270
2022-10-27 16:57:42,105 : INFO : -6.147 per-word bound, 70.9 perplexity estimate based on a held-out corpus of 993 documents with 6741 words
2022-10-27 16:57:42,105 : INFO : PROGRESS: pass 

2022-10-27 16:57:43,383 : INFO : topic diff=0.120614, rho=0.190705
2022-10-27 16:57:43,384 : INFO : PROGRESS: pass 9, at document #12000/34993
2022-10-27 16:57:43,629 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:43,633 : INFO : topic #0 (0.250): 0.038*"knew" + 0.029*"trump" + 0.020*"democracy" + 0.013*"american_people" + 0.013*"accept" + 0.013*"trump_tried" + 0.013*"break" + 0.013*"loss" + 0.013*"rejection" + 0.013*"decided"
2022-10-27 16:57:43,635 : INFO : topic #1 (0.250): 0.045*"violence" + 0.042*"thcomm" + 0.042*"love" + 0.041*"maga_gop" + 0.041*"correct" + 0.041*"mark_hamill" + 0.041*"overthrowing_democracy" + 0.041*"country_without_creating" + 0.027*"trump" + 0.023*"want"
2022-10-27 16:57:43,636 : INFO : topic #2 (0.250): 0.034*"trump" + 0.026*"roger_stone" + 0.024*"trump_lost" + 0.023*"help" + 0.023*"yet" + 0.022*"mark_meadows" + 0.022*"decided" + 0.022*"new_video" + 0.022*"hatch_coup" + 0.022*"rudy_giuliani"
2022-10-27 16:57:43,6

2022-10-27 16:57:45,470 : INFO : topic #2 (0.250): 0.034*"trump" + 0.033*"roger_stone" + 0.031*"trump_lost" + 0.030*"help" + 0.029*"yet" + 0.029*"mark_meadows" + 0.028*"decided" + 0.028*"rudy_giuliani" + 0.028*"hatch_coup" + 0.028*"new_video"
2022-10-27 16:57:45,471 : INFO : topic #3 (0.250): 0.089*"called" + 0.089*"responsible" + 0.088*"knew_trump" + 0.088*"mitch_mcconnell" + 0.088*"kevin_mccarthy" + 0.088*"backed" + 0.016*"thing" + 0.016*"person" + 0.015*"national" + 0.015*"miller"
2022-10-27 16:57:45,472 : INFO : topic diff=0.131028, rho=0.190705
2022-10-27 16:57:45,472 : INFO : PROGRESS: pass 9, at document #26000/34993
2022-10-27 16:57:45,688 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-27 16:57:45,692 : INFO : topic #0 (0.250): 0.047*"knew" + 0.035*"trump" + 0.018*"democracy" + 0.013*"american_people" + 0.013*"trump_tried" + 0.013*"accept" + 0.013*"break" + 0.013*"loss" + 0.013*"decided" + 0.013*"rejection"
2022-10-27 16:57:45,693 : INFO : 

In [22]:
#see the top ten keywords for each topic and the weightage(importance) of each keyword using
#The weights reflect how important a keyword is to that topic.
lda_model.print_topics()

2022-10-27 16:57:46,921 : INFO : topic #0 (0.250): 0.046*"knew" + 0.030*"trump" + 0.015*"democracy" + 0.011*"lost" + 0.010*"american_people" + 0.009*"break" + 0.009*"trump_tried" + 0.009*"accept" + 0.009*"loss" + 0.009*"decided"
2022-10-27 16:57:46,923 : INFO : topic #1 (0.250): 0.050*"trump" + 0.039*"subpoena_donald" + 0.039*"testify" + 0.037*"voted_unanimously" + 0.037*"demands" + 0.037*"oath_democracy" + 0.037*"history_deserves" + 0.026*"violence" + 0.025*"thcomm" + 0.024*"love"
2022-10-27 16:57:46,923 : INFO : topic #2 (0.250): 0.038*"trump" + 0.029*"roger_stone" + 0.028*"trump_lost" + 0.026*"help" + 0.025*"yet" + 0.024*"mark_meadows" + 0.024*"decided" + 0.024*"rudy_giuliani" + 0.024*"hatch_coup" + 0.024*"new_video"
2022-10-27 16:57:46,924 : INFO : topic #3 (0.250): 0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"knew_trump" + 0.075*"responsible" + 0.075*"kevin_mccarthy" + 0.075*"backed" + 0.020*"thing" + 0.018*"sec" + 0.018*"person" + 0.018*"national"


[(0,
  '0.046*"knew" + 0.030*"trump" + 0.015*"democracy" + 0.011*"lost" + 0.010*"american_people" + 0.009*"break" + 0.009*"trump_tried" + 0.009*"accept" + 0.009*"loss" + 0.009*"decided"'),
 (1,
  '0.050*"trump" + 0.039*"subpoena_donald" + 0.039*"testify" + 0.037*"voted_unanimously" + 0.037*"demands" + 0.037*"oath_democracy" + 0.037*"history_deserves" + 0.026*"violence" + 0.025*"thcomm" + 0.024*"love"'),
 (2,
  '0.038*"trump" + 0.029*"roger_stone" + 0.028*"trump_lost" + 0.026*"help" + 0.025*"yet" + 0.024*"mark_meadows" + 0.024*"decided" + 0.024*"rudy_giuliani" + 0.024*"hatch_coup" + 0.024*"new_video"'),
 (3,
  '0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"knew_trump" + 0.075*"responsible" + 0.075*"kevin_mccarthy" + 0.075*"backed" + 0.020*"thing" + 0.018*"sec" + 0.018*"person" + 0.018*"national"')]

In [23]:
# # Visualize the topics
# pyLDAvis.enable_notebook()
# vis = pyLDAvis.gensim.prepare(lda_model, corpus2, id2word)
# vis