### LDA Probabilistic Modeling - LDA uses Gensim
- Uses gensim simple pre-processing for tokenization

In [1]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.util import ngrams
nltk.download('stopwords')
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
import re

import gensim
from gensim import corpora, models, similarities, matutils
from gensim.models.phrases import Phrases, ENGLISH_CONNECTOR_WORDS
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# spacy for lemmatization
import spacy

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/jennihawk/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
import pyLDAvis
import pyLDAvis.gensim_models  # don't skip this
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#got tweets at 1:15 Pacific Time 10/14/22
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science/NLP_Unsupervised Learning/Project_NLP/TweetBatch3.csv')
tweets

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...
...,...,...
34988,RT @Adrian_Fontes: The January 6th committee j...,rt fontes the january 6th committee just concl...
34989,#January6thCommitteeHearings and everyone runn...,january6thcommitteehearings and everyone runn...
34990,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
34991,So they are gonna subpoena Trump I am guessing...,so they are gonna subpoena trump am guessing t...


### Stop Words 

In [3]:
stop_words = stopwords.words('english')
# extend to customize list of stopwords
stop_words.extend(['rt', 'january', 'january6thcommitteehearings', 'ja'])

### Create list of initially cleaned tweets
- in this section you could also set up regex variables for further cleaning. data = regex code

In [4]:
data = list(tweets.cleaned)

### Gensim tokenization via simple_preprocess( )
- https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/#9createbigramandtrigrammodels

In [8]:
# convert to list because gensim created a generator type object and need to tell it it's a list. 
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations

data_words = list(sent_to_words(data))

#print(data_words)

In [9]:
print(type(data_words))

<class 'list'>


In [10]:
print(data_words[1])

['rt', 'is', 'this', 'thing', 'on', 'this', 'is', 'pathetic', 'acting', 'sec', 'of', 'defense', 'chris', 'miller', 'was', 'the', 'only', 'person', 'who', 'can', 'deploy', 'the', 'national']


### Phrases Model Arguments
- Radi's code example https://radimrehurek.com/gensim/models/phrases.html
- first arg takes a sequence of lists of tokens
- min counts = minimum number of times word combination needs to be seen to be considered bigram
- threshold = Gensim detects a bigram if a scoring function for two words exceeds a threshold - big topic - see scoring
https://datascience.stackexchange.com/questions/25524/how-does-phrases-in-gensim-work
- connector_words = connector_words (set of str, optional) Set of words that may be included within a phrase, without affecting its scoring. No phrase can start nor end with a connector word; a phrase may contain any number of connector words in the middle.
- connector_words=phrases.ENGLISH_CONNECTOR_WORDS This will cause phrases to include common English articles, prepositions and conjuctions, such as bank_of_america or eye_of_the_beholder.

#### Build Bigram Model

In [11]:
# Create training corpus. Must be a sequence of sentences (e.g. an iterable or a generator).
bigram_mod = Phrases(data_words, min_count=1, threshold=1, connector_words=ENGLISH_CONNECTOR_WORDS)

2022-10-25 16:50:22,085 : INFO : collecting all words and their counts
2022-10-25 16:50:22,086 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2022-10-25 16:50:22,289 : INFO : PROGRESS: at sentence #10000, processed 190666 words and 25460 word types
2022-10-25 16:50:22,492 : INFO : PROGRESS: at sentence #20000, processed 379448 words and 40019 word types
2022-10-25 16:50:22,696 : INFO : PROGRESS: at sentence #30000, processed 571741 words and 51604 word types
2022-10-25 16:50:22,799 : INFO : collected 55276 token types (unigram + bigrams) from a corpus of 666324 words and 34993 sentences
2022-10-25 16:50:22,800 : INFO : merged Phrases<55276 vocab, min_count=1, threshold=1, max_vocab_size=40000000>
2022-10-25 16:50:22,801 : INFO : Phrases lifecycle event {'msg': 'built Phrases<55276 vocab, min_count=1, threshold=1, max_vocab_size=40000000> in 0.72s', 'datetime': '2022-10-25T16:50:22.801546', 'gensim': '4.1.2', 'python': '3.9.7 (default, Sep 16 2021, 08:50:36) \n[Cl

#### EDA: See how the trained model is working on unseen sentences

In [12]:
# Apply the trained phrases model to a new, unseen sentence.
new_sentence = ['tfgs', 'big', 'lie', 'to', 'steal', 'the', 'election', 'was', 'premeditated', 'months', 'before', 'the', 'election']

In [13]:
# Apply the trained phrases model to a new, unseen sentence.
# The toy model considered 'premeditated', 'months' a single phrase => joined the two
# tokens into a single "phrase" token, using our selected `_` delimiter.
bigram_mod[new_sentence]

['tfgs',
 'big_lie',
 'to',
 'steal_the_election',
 'was',
 'premeditated_months',
 'before_the_election']

### Define Functions: Remove Stopwords and Make Bigrams
- lemmatization also included when necessary

In [16]:
def remove_stopwords(texts):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]

def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]

# def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
#     """https://spacy.io/api/annotation"""
#     texts_out = []
#     for sent in texts:
#         doc = nlp(" ".join(sent)) 
#         texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
#     return texts_out

### Call the functions in order

In [17]:
# Remove Stop Words
data_words_nostops = remove_stopwords(data_words)

# Form Bigrams
data_words_bigrams = make_bigrams(data_words_nostops)

# Initialize spacy 'en' model, keeping only tagger component (for efficiency)
# python3 -m spacy download en
#nlp = spacy.load('en', disable=['parser', 'ner'])
#nlp = spacy.load("en_core_web_sm")

# Do lemmatization keeping only noun, adj, vb, adv
#data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

In [18]:
print(data_words_bigrams[:1])

[['roevember', 'forthepeople', 'votebluein', 'oathbreakermaga']]


### Prepare the Inputs for Modeling
- two main inputs to the LDA topic model: 
- the dictionary (id2word) and 
-the corpus.

In [19]:
#Create Dictionary
#corpora module implements concept of dictionary between words and their integer ids 
id2word = corpora.Dictionary(data_words_bigrams)

2022-10-25 16:57:09,040 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2022-10-25 16:57:09,165 : INFO : adding document #10000 to Dictionary(6011 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-25 16:57:09,285 : INFO : adding document #20000 to Dictionary(8721 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-25 16:57:09,409 : INFO : adding document #30000 to Dictionary(10585 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...)
2022-10-25 16:57:09,476 : INFO : built Dictionary(11177 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chris']...) from 34993 documents (total 247325 corpus positions)
2022-10-25 16:57:09,477 : INFO : Dictionary lifecycle event {'msg': "built Dictionary(11177 unique tokens: ['forthepeople', 'oathbreakermaga', 'roevember', 'votebluein', 'defense_chr

In [22]:
id2word[1]

'oathbreakermaga'

#### Create bag of words corpus
- corpus contains the word id and its frequency in every document https://www.tutorialspoint.com/gensim/gensim_creating_a_bag_of_words_corpus.htm
- This is used as input into LDA model

In [23]:
# bag of words corpus
corpus2 = [id2word.doc2bow(text) for text in data_words_bigrams]

In [28]:
# View: Gensim creates a unique id for each word in the document. Tuples are word id, word frequency
# This is used as teh input by the LDA model. 
print(corpus2[:1])

[[(0, 1), (1, 1), (2, 1), (3, 1)]]


In [25]:
#to see what word a given id corresponds to, pass the id as a key to the dictionary
id2word[0]

'forthepeople'

In [26]:
# Human readable format of corpus (term-frequency)
[[(id2word[id], freq) for id, freq in cp] for cp in corpus2[:1]]

[[('forthepeople', 1),
  ('oathbreakermaga', 1),
  ('roevember', 1),
  ('votebluein', 1)]]

### Jump back on track: Build the topic model
- More on the parameters https://miningthedetails.com/blog/python/lda/GensimLDA/
- passes: is the total number of training passes.
- random_state: ({np.random.RandomState, int}, optional) – Either a randomState object or a seed to generate one. Useful for reproducibility.

In [30]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus2,
                                           id2word=id2word,
                                           num_topics=4, 
                                           random_state=100,
                                           passes=10)

2022-10-25 17:43:12,323 : INFO : using symmetric alpha at 0.25
2022-10-25 17:43:12,325 : INFO : using symmetric eta at 0.25
2022-10-25 17:43:12,330 : INFO : using serial LDA version on this node
2022-10-25 17:43:12,341 : INFO : running online (multi-pass) LDA training, 4 topics, 10 passes over the supplied corpus of 34993 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2022-10-25 17:43:12,342 : INFO : PROGRESS: pass 0, at document #2000/34993
2022-10-25 17:43:13,369 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:13,375 : INFO : topic #0 (0.250): 0.045*"knew" + 0.019*"donald_trump" + 0.016*"th" + 0.015*"trump" + 0.014*"nancy_pelosi" + 0.012*"tell" + 0.012*"otherwise" + 0.012*"let_cnn" + 0.012*"hates_steve" + 0.012*"clear"
2022-10-25 17:43:13,376 : INFO : topic #1 (0.250): 0.029*"th" + 0.028*"trump" + 0.020*"nancy_pelosi" + 0.019*"want" +

2022-10-25 17:43:15,197 : INFO : topic #1 (0.250): 0.050*"trump" + 0.023*"th" + 0.021*"want" + 0.020*"testify" + 0.020*"subpoena_donald" + 0.019*"voted_unanimously" + 0.018*"demands" + 0.018*"history_deserves" + 0.018*"oath_democracy" + 0.014*"thanks"
2022-10-25 17:43:15,199 : INFO : topic #2 (0.250): 0.064*"violence" + 0.040*"thcomm" + 0.040*"love" + 0.040*"maga_gop" + 0.039*"correct" + 0.039*"overthrowing_democracy" + 0.039*"mark_hamill" + 0.039*"country_without_creating" + 0.037*"trump" + 0.024*"says"
2022-10-25 17:43:15,200 : INFO : topic #3 (0.250): 0.079*"called" + 0.079*"kevin_mccarthy" + 0.078*"responsible" + 0.078*"mitch_mcconnell" + 0.078*"knew_trump" + 0.078*"backed" + 0.029*"decided" + 0.018*"trump_lost" + 0.018*"roger_stone" + 0.018*"yet"
2022-10-25 17:43:15,200 : INFO : topic diff=0.348380, rho=0.377964
2022-10-25 17:43:15,202 : INFO : PROGRESS: pass 0, at document #16000/34993
2022-10-25 17:43:15,468 : INFO : merging changes from 2000 documents into a model of 34993 docu

2022-10-25 17:43:17,288 : INFO : topic diff=0.264663, rho=0.277350
2022-10-25 17:43:17,289 : INFO : PROGRESS: pass 0, at document #28000/34993
2022-10-25 17:43:17,570 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:17,574 : INFO : topic #1 (0.250): 0.051*"trump" + 0.025*"subpoena_donald" + 0.025*"testify" + 0.024*"voted_unanimously" + 0.024*"demands" + 0.024*"history_deserves" + 0.024*"oath_democracy" + 0.010*"th" + 0.008*"going" + 0.008*"social_media"
2022-10-25 17:43:17,575 : INFO : topic #2 (0.250): 0.052*"violence" + 0.038*"trump" + 0.024*"thcomm" + 0.024*"love" + 0.024*"th_attack" + 0.023*"trump_summoned" + 0.023*"goaded" + 0.023*"author" + 0.023*"maga_gop" + 0.023*"rioters_trump"
2022-10-25 17:43:17,577 : INFO : topic #3 (0.250): 0.070*"called" + 0.070*"knew_trump" + 0.070*"responsible" + 0.070*"mitch_mcconnell" + 0.070*"kevin_mccarthy" + 0.069*"backed" + 0.035*"decided" + 0.026*"roger_stone" + 0.025*"trump_lost" + 0.025*"help"
2022-1

2022-10-25 17:43:19,415 : INFO : topic #2 (0.250): 0.061*"violence" + 0.035*"trump" + 0.035*"thcomm" + 0.033*"love" + 0.033*"maga_gop" + 0.033*"correct" + 0.033*"overthrowing_democracy" + 0.033*"mark_hamill" + 0.033*"country_without_creating" + 0.023*"trump_summoned"
2022-10-25 17:43:19,415 : INFO : topic #3 (0.250): 0.077*"called" + 0.077*"responsible" + 0.077*"mitch_mcconnell" + 0.077*"knew_trump" + 0.077*"kevin_mccarthy" + 0.077*"backed" + 0.028*"decided" + 0.019*"roger_stone" + 0.018*"trump_lost" + 0.018*"help"
2022-10-25 17:43:19,416 : INFO : topic diff=0.256456, rho=0.226476
2022-10-25 17:43:19,418 : INFO : PROGRESS: pass 1, at document #6000/34993
2022-10-25 17:43:19,676 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:19,681 : INFO : topic #0 (0.250): 0.046*"knew" + 0.015*"trump" + 0.013*"nancy_pelosi" + 0.012*"would" + 0.011*"happened" + 0.010*"testified" + 0.009*"th" + 0.009*"afraid" + 0.009*"hours" + 0.009*"donald_trump"
2022-10-2

2022-10-25 17:43:21,210 : INFO : topic #0 (0.250): 0.049*"knew" + 0.026*"trump" + 0.010*"election" + 0.009*"donald_trump" + 0.009*"happened" + 0.008*"lost" + 0.008*"testified" + 0.008*"held" + 0.008*"violent_armed" + 0.007*"afraid"
2022-10-25 17:43:21,211 : INFO : topic #1 (0.250): 0.050*"trump" + 0.021*"testify" + 0.021*"subpoena_donald" + 0.020*"voted_unanimously" + 0.020*"demands" + 0.020*"history_deserves" + 0.020*"oath_democracy" + 0.016*"th" + 0.013*"want" + 0.009*"going"
2022-10-25 17:43:21,212 : INFO : topic #2 (0.250): 0.065*"violence" + 0.040*"thcomm" + 0.040*"love" + 0.039*"maga_gop" + 0.038*"correct" + 0.038*"overthrowing_democracy" + 0.038*"mark_hamill" + 0.038*"country_without_creating" + 0.037*"trump" + 0.022*"says"
2022-10-25 17:43:21,213 : INFO : topic #3 (0.250): 0.086*"called" + 0.084*"responsible" + 0.084*"mitch_mcconnell" + 0.084*"kevin_mccarthy" + 0.084*"knew_trump" + 0.084*"backed" + 0.029*"decided" + 0.019*"roger_stone" + 0.018*"trump_lost" + 0.018*"help"
2022-1

2022-10-25 17:43:23,122 : INFO : topic #3 (0.250): 0.071*"called" + 0.070*"knew_trump" + 0.070*"responsible" + 0.070*"mitch_mcconnell" + 0.070*"kevin_mccarthy" + 0.070*"backed" + 0.034*"decided" + 0.026*"roger_stone" + 0.025*"trump_lost" + 0.024*"help"
2022-10-25 17:43:23,123 : INFO : topic diff=0.208833, rho=0.226476
2022-10-25 17:43:23,124 : INFO : PROGRESS: pass 1, at document #32000/34993
2022-10-25 17:43:23,379 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:23,384 : INFO : topic #0 (0.250): 0.048*"knew" + 0.022*"trump" + 0.016*"happened" + 0.014*"testified" + 0.014*"afraid" + 0.014*"violent_armed" + 0.014*"hours" + 0.014*"ev" + 0.014*"trump_get" + 0.013*"biggest_audience"
2022-10-25 17:43:23,385 : INFO : topic #1 (0.250): 0.055*"trump" + 0.025*"testify" + 0.025*"subpoena_donald" + 0.024*"voted_unanimously" + 0.023*"demands" + 0.023*"history_deserves" + 0.023*"oath_democracy" + 0.010*"going" + 0.009*"th" + 0.007*"want"
2022-10-25 17:43

2022-10-25 17:43:25,220 : INFO : topic #1 (0.250): 0.045*"trump" + 0.023*"th" + 0.020*"want" + 0.018*"testify" + 0.017*"subpoena_donald" + 0.017*"voted_unanimously" + 0.016*"thanks" + 0.016*"demands" + 0.016*"oath_democracy" + 0.016*"history_deserves"
2022-10-25 17:43:25,221 : INFO : topic #2 (0.250): 0.069*"violence" + 0.045*"thcomm" + 0.045*"love" + 0.044*"maga_gop" + 0.044*"correct" + 0.044*"country_without_creating" + 0.044*"overthrowing_democracy" + 0.044*"mark_hamill" + 0.032*"trump" + 0.020*"trump_summoned"
2022-10-25 17:43:25,222 : INFO : topic #3 (0.250): 0.083*"responsible" + 0.083*"called" + 0.083*"kevin_mccarthy" + 0.083*"mitch_mcconnell" + 0.083*"knew_trump" + 0.083*"backed" + 0.026*"decided" + 0.016*"roger_stone" + 0.015*"thing" + 0.015*"trump_lost"
2022-10-25 17:43:25,224 : INFO : topic diff=0.218716, rho=0.220882
2022-10-25 17:43:25,225 : INFO : PROGRESS: pass 2, at document #10000/34993
2022-10-25 17:43:25,489 : INFO : merging changes from 2000 documents into a model o

2022-10-25 17:43:27,351 : INFO : topic diff=0.212509, rho=0.220882
2022-10-25 17:43:27,352 : INFO : PROGRESS: pass 2, at document #22000/34993
2022-10-25 17:43:27,599 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:27,603 : INFO : topic #0 (0.250): 0.047*"knew" + 0.026*"trump" + 0.011*"happened" + 0.010*"violent_armed" + 0.009*"testified" + 0.009*"donald_trump" + 0.009*"election" + 0.009*"act" + 0.009*"hours" + 0.009*"afraid"
2022-10-25 17:43:27,604 : INFO : topic #1 (0.250): 0.052*"trump" + 0.022*"subpoena_donald" + 0.022*"testify" + 0.021*"voted_unanimously" + 0.021*"demands" + 0.021*"oath_democracy" + 0.021*"history_deserves" + 0.013*"th" + 0.011*"going" + 0.010*"want"
2022-10-25 17:43:27,605 : INFO : topic #2 (0.250): 0.063*"violence" + 0.041*"trump" + 0.035*"thcomm" + 0.035*"love" + 0.034*"maga_gop" + 0.033*"correct" + 0.033*"country_without_creating" + 0.033*"mark_hamill" + 0.033*"overthrowing_democracy" + 0.022*"th_attack"
2022-10-25

2022-10-25 17:43:29,119 : INFO : topic #3 (0.250): 0.074*"mitch_mcconnell" + 0.074*"called" + 0.074*"responsible" + 0.074*"knew_trump" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.029*"decided" + 0.023*"roger_stone" + 0.022*"trump_lost" + 0.021*"help"
2022-10-25 17:43:29,119 : INFO : topic diff=0.158114, rho=0.220882
2022-10-25 17:43:29,345 : INFO : -6.168 per-word bound, 71.9 perplexity estimate based on a held-out corpus of 993 documents with 6779 words
2022-10-25 17:43:29,346 : INFO : PROGRESS: pass 2, at document #34993/34993
2022-10-25 17:43:29,474 : INFO : merging changes from 993 documents into a model of 34993 documents
2022-10-25 17:43:29,478 : INFO : topic #0 (0.250): 0.044*"knew" + 0.019*"trump" + 0.016*"happened" + 0.015*"testified" + 0.014*"hours" + 0.014*"afraid" + 0.014*"ev" + 0.014*"trump_get" + 0.014*"hey_trump" + 0.014*"biggest_audience"
2022-10-25 17:43:29,479 : INFO : topic #1 (0.250): 0.057*"trump" + 0.028*"subpoena_donald" + 0.028*"testify" + 0.027*"voted_unanimo

2022-10-25 17:43:31,032 : INFO : topic #1 (0.250): 0.046*"trump" + 0.022*"th" + 0.020*"want" + 0.018*"testify" + 0.017*"voted_unanimously" + 0.017*"subpoena_donald" + 0.016*"demands" + 0.016*"history_deserves" + 0.016*"oath_democracy" + 0.015*"thanks"
2022-10-25 17:43:31,033 : INFO : topic #2 (0.250): 0.072*"violence" + 0.046*"thcomm" + 0.045*"love" + 0.045*"maga_gop" + 0.044*"correct" + 0.044*"mark_hamill" + 0.044*"overthrowing_democracy" + 0.044*"country_without_creating" + 0.034*"trump" + 0.023*"says"
2022-10-25 17:43:31,034 : INFO : topic #3 (0.250): 0.083*"called" + 0.083*"responsible" + 0.083*"kevin_mccarthy" + 0.083*"mitch_mcconnell" + 0.083*"knew_trump" + 0.083*"backed" + 0.029*"decided" + 0.019*"roger_stone" + 0.017*"trump_lost" + 0.017*"help"
2022-10-25 17:43:31,035 : INFO : topic diff=0.184232, rho=0.215683
2022-10-25 17:43:31,036 : INFO : PROGRESS: pass 3, at document #14000/34993
2022-10-25 17:43:31,277 : INFO : merging changes from 2000 documents into a model of 34993 doc

2022-10-25 17:43:32,958 : INFO : topic diff=0.179911, rho=0.215683
2022-10-25 17:43:32,959 : INFO : PROGRESS: pass 3, at document #26000/34993
2022-10-25 17:43:33,205 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:33,209 : INFO : topic #0 (0.250): 0.050*"knew" + 0.025*"trump" + 0.013*"violent_armed" + 0.012*"happened" + 0.011*"testified" + 0.011*"act" + 0.011*"plans" + 0.011*"ins" + 0.011*"reason_law" + 0.011*"flood"
2022-10-25 17:43:33,211 : INFO : topic #1 (0.250): 0.053*"trump" + 0.025*"subpoena_donald" + 0.024*"testify" + 0.023*"voted_unanimously" + 0.023*"demands" + 0.023*"history_deserves" + 0.023*"oath_democracy" + 0.012*"going" + 0.011*"th" + 0.008*"want"
2022-10-25 17:43:33,212 : INFO : topic #2 (0.250): 0.056*"violence" + 0.037*"trump" + 0.029*"thcomm" + 0.028*"love" + 0.027*"maga_gop" + 0.027*"correct" + 0.027*"overthrowing_democracy" + 0.027*"country_without_creating" + 0.027*"mark_hamill" + 0.023*"th_attack"
2022-10-25 17:43:3

2022-10-25 17:43:34,778 : INFO : topic #2 (0.250): 0.059*"violence" + 0.039*"trump" + 0.030*"thcomm" + 0.029*"love" + 0.029*"maga_gop" + 0.028*"correct" + 0.028*"mark_hamill" + 0.028*"overthrowing_democracy" + 0.028*"country_without_creating" + 0.025*"trump_summoned"
2022-10-25 17:43:34,779 : INFO : topic #3 (0.250): 0.077*"called" + 0.077*"responsible" + 0.077*"mitch_mcconnell" + 0.077*"knew_trump" + 0.076*"kevin_mccarthy" + 0.076*"backed" + 0.028*"decided" + 0.021*"roger_stone" + 0.020*"trump_lost" + 0.019*"help"
2022-10-25 17:43:34,780 : INFO : topic diff=0.224323, rho=0.210835
2022-10-25 17:43:34,780 : INFO : PROGRESS: pass 4, at document #4000/34993
2022-10-25 17:43:35,008 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:35,012 : INFO : topic #0 (0.250): 0.048*"knew" + 0.016*"trump" + 0.015*"nancy_pelosi" + 0.012*"happened" + 0.011*"would" + 0.011*"testified" + 0.010*"th" + 0.010*"afraid" + 0.010*"hours" + 0.010*"ev"
2022-10-25 17:43:35

2022-10-25 17:43:36,475 : INFO : topic #0 (0.250): 0.046*"knew" + 0.024*"trump" + 0.009*"election" + 0.009*"donald_trump" + 0.009*"happened" + 0.008*"nancy_pelosi" + 0.008*"violent_armed" + 0.008*"testified" + 0.008*"act" + 0.008*"democracy"
2022-10-25 17:43:36,477 : INFO : topic #1 (0.250): 0.046*"trump" + 0.019*"th" + 0.018*"testify" + 0.018*"subpoena_donald" + 0.017*"voted_unanimously" + 0.016*"demands" + 0.016*"history_deserves" + 0.016*"oath_democracy" + 0.015*"want" + 0.010*"thanks"
2022-10-25 17:43:36,478 : INFO : topic #2 (0.250): 0.068*"violence" + 0.042*"thcomm" + 0.042*"love" + 0.041*"maga_gop" + 0.041*"correct" + 0.041*"mark_hamill" + 0.041*"overthrowing_democracy" + 0.041*"country_without_creating" + 0.037*"trump" + 0.023*"says"
2022-10-25 17:43:36,479 : INFO : topic #3 (0.250): 0.088*"called" + 0.087*"responsible" + 0.087*"kevin_mccarthy" + 0.087*"knew_trump" + 0.087*"mitch_mcconnell" + 0.086*"backed" + 0.028*"decided" + 0.019*"roger_stone" + 0.018*"trump_lost" + 0.017*"h

2022-10-25 17:43:38,310 : INFO : topic #3 (0.250): 0.073*"called" + 0.073*"knew_trump" + 0.073*"responsible" + 0.073*"mitch_mcconnell" + 0.073*"kevin_mccarthy" + 0.073*"backed" + 0.035*"decided" + 0.026*"roger_stone" + 0.024*"trump_lost" + 0.024*"help"
2022-10-25 17:43:38,311 : INFO : topic diff=0.205409, rho=0.210835
2022-10-25 17:43:38,313 : INFO : PROGRESS: pass 4, at document #30000/34993
2022-10-25 17:43:38,540 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:38,545 : INFO : topic #0 (0.250): 0.045*"knew" + 0.023*"trump" + 0.014*"happened" + 0.014*"violent_armed" + 0.012*"testified" + 0.012*"act" + 0.012*"plans" + 0.012*"ins" + 0.012*"reason_law" + 0.012*"flood"
2022-10-25 17:43:38,546 : INFO : topic #1 (0.250): 0.051*"trump" + 0.023*"testify" + 0.023*"subpoena_donald" + 0.022*"voted_unanimously" + 0.022*"demands" + 0.022*"history_deserves" + 0.022*"oath_democracy" + 0.011*"going" + 0.009*"th" + 0.007*"want"
2022-10-25 17:43:38,546 : IN

2022-10-25 17:43:40,498 : INFO : topic #1 (0.250): 0.046*"trump" + 0.020*"th" + 0.019*"testify" + 0.018*"subpoena_donald" + 0.017*"voted_unanimously" + 0.017*"want" + 0.017*"demands" + 0.017*"oath_democracy" + 0.017*"history_deserves" + 0.013*"thanks"
2022-10-25 17:43:40,499 : INFO : topic #2 (0.250): 0.066*"violence" + 0.040*"thcomm" + 0.039*"love" + 0.039*"maga_gop" + 0.038*"correct" + 0.038*"mark_hamill" + 0.038*"overthrowing_democracy" + 0.038*"country_without_creating" + 0.034*"trump" + 0.021*"trump_summoned"
2022-10-25 17:43:40,500 : INFO : topic #3 (0.250): 0.080*"called" + 0.080*"responsible" + 0.080*"mitch_mcconnell" + 0.080*"knew_trump" + 0.080*"kevin_mccarthy" + 0.080*"backed" + 0.028*"decided" + 0.018*"roger_stone" + 0.017*"trump_lost" + 0.017*"help"
2022-10-25 17:43:40,501 : INFO : topic diff=0.222177, rho=0.206300
2022-10-25 17:43:40,503 : INFO : PROGRESS: pass 5, at document #8000/34993
2022-10-25 17:43:40,733 : INFO : merging changes from 2000 documents into a model of 

2022-10-25 17:43:41,967 : INFO : topic diff=0.186619, rho=0.206300
2022-10-25 17:43:42,359 : INFO : -6.426 per-word bound, 86.0 perplexity estimate based on a held-out corpus of 2000 documents with 14458 words
2022-10-25 17:43:42,360 : INFO : PROGRESS: pass 5, at document #20000/34993
2022-10-25 17:43:42,587 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:42,591 : INFO : topic #0 (0.250): 0.050*"knew" + 0.026*"trump" + 0.010*"election" + 0.009*"donald_trump" + 0.009*"happened" + 0.009*"lost" + 0.008*"testified" + 0.008*"violent_armed" + 0.008*"hours" + 0.007*"afraid"
2022-10-25 17:43:42,593 : INFO : topic #1 (0.250): 0.049*"trump" + 0.020*"subpoena_donald" + 0.020*"testify" + 0.019*"voted_unanimously" + 0.019*"demands" + 0.019*"history_deserves" + 0.019*"oath_democracy" + 0.014*"th" + 0.011*"want" + 0.010*"going"
2022-10-25 17:43:42,594 : INFO : topic #2 (0.250): 0.066*"violence" + 0.039*"thcomm" + 0.039*"love" + 0.038*"maga_gop" + 0.037*"t

2022-10-25 17:43:44,077 : INFO : topic #2 (0.250): 0.058*"violence" + 0.041*"trump" + 0.027*"thcomm" + 0.027*"trump_summoned" + 0.027*"th_attack" + 0.027*"author" + 0.027*"goaded" + 0.027*"rioters_trump" + 0.027*"trump_excuse" + 0.026*"love"
2022-10-25 17:43:44,078 : INFO : topic #3 (0.250): 0.073*"called" + 0.073*"responsible" + 0.073*"knew_trump" + 0.073*"mitch_mcconnell" + 0.073*"kevin_mccarthy" + 0.073*"backed" + 0.031*"decided" + 0.024*"roger_stone" + 0.023*"trump_lost" + 0.022*"help"
2022-10-25 17:43:44,079 : INFO : topic diff=0.154083, rho=0.206300
2022-10-25 17:43:44,079 : INFO : PROGRESS: pass 5, at document #34000/34993
2022-10-25 17:43:44,298 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:44,302 : INFO : topic #0 (0.250): 0.050*"knew" + 0.021*"trump" + 0.016*"happened" + 0.014*"testified" + 0.014*"afraid" + 0.014*"hours" + 0.014*"ev" + 0.014*"trump_get" + 0.014*"biggest_audience" + 0.014*"hey_trump"
2022-10-25 17:43:44,303 : INF

2022-10-25 17:43:45,769 : INFO : topic #0 (0.250): 0.043*"knew" + 0.016*"trump" + 0.012*"nancy_pelosi" + 0.010*"happened" + 0.009*"donald_trump" + 0.009*"would" + 0.008*"testified" + 0.008*"violent_armed" + 0.008*"act" + 0.008*"afraid"
2022-10-25 17:43:45,770 : INFO : topic #1 (0.250): 0.045*"trump" + 0.024*"th" + 0.022*"want" + 0.018*"testify" + 0.018*"thanks" + 0.017*"vice_chair" + 0.017*"send_special" + 0.017*"service_leading" + 0.017*"chair_amp" + 0.017*"subpoena_donald"
2022-10-25 17:43:45,771 : INFO : topic #2 (0.250): 0.072*"violence" + 0.046*"thcomm" + 0.046*"love" + 0.046*"maga_gop" + 0.045*"correct" + 0.045*"overthrowing_democracy" + 0.045*"mark_hamill" + 0.045*"country_without_creating" + 0.034*"trump" + 0.021*"trump_summoned"
2022-10-25 17:43:45,772 : INFO : topic #3 (0.250): 0.083*"called" + 0.083*"responsible" + 0.083*"kevin_mccarthy" + 0.083*"mitch_mcconnell" + 0.083*"knew_trump" + 0.083*"backed" + 0.028*"decided" + 0.018*"roger_stone" + 0.017*"trump_lost" + 0.016*"help"

2022-10-25 17:43:47,531 : INFO : topic #3 (0.250): 0.080*"called" + 0.079*"responsible" + 0.079*"mitch_mcconnell" + 0.079*"knew_trump" + 0.079*"kevin_mccarthy" + 0.079*"backed" + 0.034*"decided" + 0.023*"roger_stone" + 0.021*"trump_lost" + 0.020*"help"
2022-10-25 17:43:47,532 : INFO : topic diff=0.171015, rho=0.202045
2022-10-25 17:43:47,533 : INFO : PROGRESS: pass 6, at document #24000/34993
2022-10-25 17:43:47,748 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:47,752 : INFO : topic #0 (0.250): 0.051*"knew" + 0.025*"trump" + 0.011*"happened" + 0.011*"donald_trump" + 0.010*"violent_armed" + 0.010*"testified" + 0.009*"hours" + 0.009*"afraid" + 0.009*"ev" + 0.009*"trump_get"
2022-10-25 17:43:47,753 : INFO : topic #1 (0.250): 0.051*"trump" + 0.022*"subpoena_donald" + 0.021*"testify" + 0.020*"voted_unanimously" + 0.020*"demands" + 0.020*"oath_democracy" + 0.020*"history_deserves" + 0.012*"th" + 0.011*"going" + 0.009*"want"
2022-10-25 17:43:47,

2022-10-25 17:43:49,207 : INFO : topic #1 (0.250): 0.055*"trump" + 0.027*"subpoena_donald" + 0.027*"testify" + 0.026*"voted_unanimously" + 0.025*"demands" + 0.025*"oath_democracy" + 0.025*"history_deserves" + 0.010*"going" + 0.008*"th" + 0.006*"want"
2022-10-25 17:43:49,208 : INFO : topic #2 (0.250): 0.057*"violence" + 0.044*"trump" + 0.029*"trump_summoned" + 0.028*"th_attack" + 0.028*"author" + 0.028*"goaded" + 0.028*"rioters_trump" + 0.028*"trump_excuse" + 0.025*"thcomm" + 0.023*"love"
2022-10-25 17:43:49,209 : INFO : topic #3 (0.250): 0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"knew_trump" + 0.075*"responsible" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.028*"decided" + 0.022*"roger_stone" + 0.021*"trump_lost" + 0.020*"thing"
2022-10-25 17:43:49,210 : INFO : topic diff=0.130444, rho=0.202045
2022-10-25 17:43:49,211 : INFO : PROGRESS: pass 7, at document #2000/34993
2022-10-25 17:43:49,442 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25

2022-10-25 17:43:50,595 : INFO : topic diff=0.162222, rho=0.198043
2022-10-25 17:43:50,596 : INFO : PROGRESS: pass 7, at document #14000/34993
2022-10-25 17:43:50,821 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:50,826 : INFO : topic #0 (0.250): 0.043*"knew" + 0.020*"trump" + 0.009*"nancy_pelosi" + 0.009*"violent_armed" + 0.009*"happened" + 0.009*"donald_trump" + 0.009*"act" + 0.008*"plans" + 0.008*"ins" + 0.008*"reason_law"
2022-10-25 17:43:50,827 : INFO : topic #1 (0.250): 0.047*"trump" + 0.019*"th" + 0.018*"testify" + 0.017*"subpoena_donald" + 0.017*"want" + 0.017*"voted_unanimously" + 0.016*"demands" + 0.016*"history_deserves" + 0.016*"oath_democracy" + 0.012*"thanks"
2022-10-25 17:43:50,829 : INFO : topic #2 (0.250): 0.068*"violence" + 0.043*"thcomm" + 0.042*"love" + 0.042*"maga_gop" + 0.041*"correct" + 0.041*"mark_hamill" + 0.041*"country_without_creating" + 0.041*"overthrowing_democracy" + 0.036*"trump" + 0.023*"says"
2022-10-25 1

2022-10-25 17:43:52,602 : INFO : topic #2 (0.250): 0.057*"violence" + 0.037*"trump" + 0.030*"thcomm" + 0.029*"love" + 0.028*"maga_gop" + 0.028*"correct" + 0.028*"mark_hamill" + 0.028*"overthrowing_democracy" + 0.028*"country_without_creating" + 0.023*"th_attack"
2022-10-25 17:43:52,603 : INFO : topic #3 (0.250): 0.075*"called" + 0.075*"knew_trump" + 0.074*"responsible" + 0.074*"mitch_mcconnell" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.034*"decided" + 0.025*"roger_stone" + 0.023*"trump_lost" + 0.023*"help"
2022-10-25 17:43:52,604 : INFO : topic diff=0.157546, rho=0.198043
2022-10-25 17:43:52,604 : INFO : PROGRESS: pass 7, at document #28000/34993
2022-10-25 17:43:52,827 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:52,831 : INFO : topic #0 (0.250): 0.049*"knew" + 0.024*"trump" + 0.014*"happened" + 0.013*"violent_armed" + 0.012*"act" + 0.012*"testified" + 0.012*"plans" + 0.012*"ins" + 0.012*"reason_law" + 0.012*"enforcement_failed"
202

2022-10-25 17:43:54,323 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:54,327 : INFO : topic #0 (0.250): 0.048*"knew" + 0.016*"trump" + 0.015*"nancy_pelosi" + 0.012*"happened" + 0.011*"testified" + 0.010*"afraid" + 0.010*"hours" + 0.010*"would" + 0.010*"ev" + 0.010*"trump_get"
2022-10-25 17:43:54,328 : INFO : topic #1 (0.250): 0.049*"trump" + 0.021*"testify" + 0.020*"subpoena_donald" + 0.020*"voted_unanimously" + 0.019*"demands" + 0.019*"history_deserves" + 0.019*"oath_democracy" + 0.015*"th" + 0.012*"want" + 0.010*"going"
2022-10-25 17:43:54,329 : INFO : topic #2 (0.250): 0.062*"violence" + 0.036*"trump" + 0.035*"thcomm" + 0.034*"love" + 0.034*"maga_gop" + 0.033*"correct" + 0.033*"mark_hamill" + 0.033*"overthrowing_democracy" + 0.033*"country_without_creating" + 0.024*"trump_summoned"
2022-10-25 17:43:54,331 : INFO : topic #3 (0.250): 0.079*"called" + 0.078*"responsible" + 0.078*"mitch_mcconnell" + 0.078*"knew_trump" + 0.078*"kevin_mccart

2022-10-25 17:43:55,699 : INFO : topic #3 (0.250): 0.087*"called" + 0.086*"responsible" + 0.086*"kevin_mccarthy" + 0.086*"knew_trump" + 0.086*"mitch_mcconnell" + 0.086*"backed" + 0.028*"decided" + 0.020*"roger_stone" + 0.018*"trump_lost" + 0.017*"help"
2022-10-25 17:43:55,700 : INFO : topic diff=0.148618, rho=0.194270
2022-10-25 17:43:55,701 : INFO : PROGRESS: pass 8, at document #18000/34993
2022-10-25 17:43:55,924 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:55,928 : INFO : topic #0 (0.250): 0.049*"knew" + 0.025*"trump" + 0.010*"election" + 0.009*"donald_trump" + 0.009*"happened" + 0.008*"testified" + 0.008*"lost" + 0.008*"violent_armed" + 0.008*"afraid" + 0.008*"hours"
2022-10-25 17:43:55,929 : INFO : topic #1 (0.250): 0.047*"trump" + 0.019*"testify" + 0.019*"subpoena_donald" + 0.018*"voted_unanimously" + 0.018*"demands" + 0.018*"oath_democracy" + 0.018*"history_deserves" + 0.015*"th" + 0.013*"want" + 0.010*"going"
2022-10-25 17:43:55

2022-10-25 17:43:58,165 : INFO : topic #1 (0.250): 0.050*"trump" + 0.023*"testify" + 0.022*"subpoena_donald" + 0.022*"voted_unanimously" + 0.021*"demands" + 0.021*"history_deserves" + 0.021*"oath_democracy" + 0.011*"going" + 0.010*"th" + 0.007*"want"
2022-10-25 17:43:58,167 : INFO : topic #2 (0.250): 0.058*"violence" + 0.039*"trump" + 0.029*"thcomm" + 0.028*"love" + 0.027*"maga_gop" + 0.027*"correct" + 0.027*"mark_hamill" + 0.027*"overthrowing_democracy" + 0.027*"country_without_creating" + 0.025*"th_attack"
2022-10-25 17:43:58,168 : INFO : topic #3 (0.250): 0.072*"called" + 0.072*"knew_trump" + 0.072*"responsible" + 0.072*"mitch_mcconnell" + 0.072*"kevin_mccarthy" + 0.072*"backed" + 0.034*"decided" + 0.026*"roger_stone" + 0.024*"trump_lost" + 0.024*"help"
2022-10-25 17:43:58,169 : INFO : topic diff=0.163058, rho=0.194270
2022-10-25 17:43:58,170 : INFO : PROGRESS: pass 8, at document #32000/34993
2022-10-25 17:43:58,388 : INFO : merging changes from 2000 documents into a model of 34993

2022-10-25 17:43:59,637 : INFO : topic diff=0.200464, rho=0.190705
2022-10-25 17:43:59,638 : INFO : PROGRESS: pass 9, at document #8000/34993
2022-10-25 17:43:59,850 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:43:59,854 : INFO : topic #0 (0.250): 0.045*"knew" + 0.015*"trump" + 0.013*"nancy_pelosi" + 0.010*"would" + 0.010*"happened" + 0.009*"donald_trump" + 0.009*"testified" + 0.009*"setup" + 0.008*"hours" + 0.008*"afraid"
2022-10-25 17:43:59,856 : INFO : topic #1 (0.250): 0.045*"trump" + 0.020*"th" + 0.018*"testify" + 0.018*"want" + 0.017*"subpoena_donald" + 0.016*"voted_unanimously" + 0.016*"demands" + 0.016*"history_deserves" + 0.016*"oath_democracy" + 0.014*"thanks"
2022-10-25 17:43:59,857 : INFO : topic #2 (0.250): 0.070*"violence" + 0.045*"thcomm" + 0.044*"love" + 0.044*"maga_gop" + 0.043*"correct" + 0.043*"mark_hamill" + 0.043*"overthrowing_democracy" + 0.043*"country_without_creating" + 0.032*"trump" + 0.021*"trump_summoned"
2022-10

2022-10-25 17:44:01,621 : INFO : topic #2 (0.250): 0.066*"violence" + 0.039*"thcomm" + 0.039*"love" + 0.038*"maga_gop" + 0.037*"correct" + 0.037*"trump" + 0.037*"overthrowing_democracy" + 0.037*"mark_hamill" + 0.037*"country_without_creating" + 0.022*"says"
2022-10-25 17:44:01,622 : INFO : topic #3 (0.250): 0.083*"called" + 0.082*"responsible" + 0.082*"mitch_mcconnell" + 0.082*"kevin_mccarthy" + 0.082*"knew_trump" + 0.082*"backed" + 0.031*"decided" + 0.021*"roger_stone" + 0.019*"trump_lost" + 0.018*"help"
2022-10-25 17:44:01,623 : INFO : topic diff=0.171304, rho=0.190705
2022-10-25 17:44:01,624 : INFO : PROGRESS: pass 9, at document #22000/34993
2022-10-25 17:44:01,840 : INFO : merging changes from 2000 documents into a model of 34993 documents
2022-10-25 17:44:01,844 : INFO : topic #0 (0.250): 0.048*"knew" + 0.025*"trump" + 0.011*"happened" + 0.010*"violent_armed" + 0.009*"testified" + 0.009*"donald_trump" + 0.009*"election" + 0.009*"act" + 0.009*"hours" + 0.009*"afraid"
2022-10-25 17

2022-10-25 17:44:03,214 : INFO : topic #1 (0.250): 0.054*"trump" + 0.025*"subpoena_donald" + 0.025*"testify" + 0.024*"voted_unanimously" + 0.023*"demands" + 0.023*"history_deserves" + 0.023*"oath_democracy" + 0.010*"going" + 0.008*"th" + 0.007*"want"
2022-10-25 17:44:03,215 : INFO : topic #2 (0.250): 0.057*"violence" + 0.042*"trump" + 0.027*"trump_summoned" + 0.027*"th_attack" + 0.027*"author" + 0.027*"goaded" + 0.027*"rioters_trump" + 0.027*"trump_excuse" + 0.026*"thcomm" + 0.025*"love"
2022-10-25 17:44:03,216 : INFO : topic #3 (0.250): 0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"responsible" + 0.074*"knew_trump" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.030*"decided" + 0.023*"roger_stone" + 0.022*"trump_lost" + 0.021*"help"
2022-10-25 17:44:03,216 : INFO : topic diff=0.128791, rho=0.190705
2022-10-25 17:44:03,413 : INFO : -6.146 per-word bound, 70.8 perplexity estimate based on a held-out corpus of 993 documents with 6779 words
2022-10-25 17:44:03,414 : INFO : PROGRESS: pas

In [31]:
#see the top ten keywords for each topic and the weightage(importance) of each keyword using
#The weights reflect how important a keyword is to that topic.
lda_model.print_topics()

2022-10-25 17:45:32,331 : INFO : topic #0 (0.250): 0.045*"knew" + 0.019*"trump" + 0.016*"happened" + 0.014*"testified" + 0.014*"hours" + 0.014*"afraid" + 0.014*"ev" + 0.014*"trump_get" + 0.013*"hey_trump" + 0.013*"biggest_audience"
2022-10-25 17:45:32,332 : INFO : topic #1 (0.250): 0.055*"trump" + 0.026*"subpoena_donald" + 0.026*"testify" + 0.025*"voted_unanimously" + 0.025*"demands" + 0.025*"history_deserves" + 0.025*"oath_democracy" + 0.010*"going" + 0.008*"th" + 0.006*"want"
2022-10-25 17:45:32,334 : INFO : topic #2 (0.250): 0.058*"violence" + 0.044*"trump" + 0.029*"trump_summoned" + 0.028*"th_attack" + 0.028*"author" + 0.028*"goaded" + 0.028*"rioters_trump" + 0.028*"trump_excuse" + 0.025*"thcomm" + 0.024*"love"
2022-10-25 17:45:32,335 : INFO : topic #3 (0.250): 0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"knew_trump" + 0.075*"responsible" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.028*"decided" + 0.022*"roger_stone" + 0.021*"trump_lost" + 0.020*"help"


[(0,
  '0.045*"knew" + 0.019*"trump" + 0.016*"happened" + 0.014*"testified" + 0.014*"hours" + 0.014*"afraid" + 0.014*"ev" + 0.014*"trump_get" + 0.013*"hey_trump" + 0.013*"biggest_audience"'),
 (1,
  '0.055*"trump" + 0.026*"subpoena_donald" + 0.026*"testify" + 0.025*"voted_unanimously" + 0.025*"demands" + 0.025*"history_deserves" + 0.025*"oath_democracy" + 0.010*"going" + 0.008*"th" + 0.006*"want"'),
 (2,
  '0.058*"violence" + 0.044*"trump" + 0.029*"trump_summoned" + 0.028*"th_attack" + 0.028*"author" + 0.028*"goaded" + 0.028*"rioters_trump" + 0.028*"trump_excuse" + 0.025*"thcomm" + 0.024*"love"'),
 (3,
  '0.075*"called" + 0.075*"mitch_mcconnell" + 0.075*"knew_trump" + 0.075*"responsible" + 0.074*"kevin_mccarthy" + 0.074*"backed" + 0.028*"decided" + 0.022*"roger_stone" + 0.021*"trump_lost" + 0.020*"help"')]

In [7]:
# # Visualize the topics
# pyLDAvis.enable_notebook()
# vis = pyLDAvis.gensim.prepare(lda_model, corpus2, id2word)
# vis