# **BERTopic Modeling**
- Starts the Intrusion Methodology
- BERTopic + KeyBertInspire()

# Imports

In [11]:
import pandas as pd

In [113]:
import random

In [12]:
from bertopic import BERTopic

# Data

In [13]:
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science Projects/Project_NLP/TweetBatch3.csv')

In [14]:
tweets.head()

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...


In [15]:
tweets.drop(['cleaned'], axis=1, inplace=True)

In [16]:
tweets.head()

Unnamed: 0,text
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...
1,RT @sandibachom: IS THIS THING ON???!!This is ...
2,RT @sandibachom: IS THIS THING ON???!!This is ...
3,RT @tleehumphrey: Today is the beginning of th...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...


In [17]:
tweets.shape

(34993, 1)

In [20]:
tweets.drop_duplicates(subset=["text"], keep='first', inplace=True)

In [21]:
tweets.shape

(5333, 1)

In [26]:
#turn tweet column into a list of strings
tweet_list = tweets["text"].tolist()

# **Topic Modeling**



## Training BERTopic

In [27]:
from umap import UMAP
from sentence_transformers import SentenceTransformer

In [28]:
# include random state to replicate results / prevents stochastic behavior
umap_model = UMAP(n_neighbors=15, n_components=5,
                  min_dist=0.0, metric='cosine', random_state=42)

#expose embeddings so we can use for visualization task
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = sentence_model.encode(tweet_list, show_progress_bar=False)

#Initiate BERTopic
topic_model = BERTopic(language="english", calculate_probabilities=True, verbose=False, umap_model = umap_model)

# Fit Model to Data
topics, probs = topic_model.fit_transform(tweet_list, embeddings)

# Extracting Topics - Most Frequent Topics

In [119]:
bertopic_info = topic_model.get_topic_info()

In [120]:
bertopic_info.head(10)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,2071,-1_the_to_january6thcommitteehearings_of,"[the, to, january6thcommitteehearings, of, in, rt, is, that, you, for]","[#January6thCommitteeHearings Trump's *own* campaign managers and lawyers were telling him that they could find no evidence of election fraud and he had lost. Yet majority of republicans still believe the election was stolen. Do you see the power of corrupt media? @FoxNews @GOP, All I could think of watching that footage of Pelosi and Schumer on the phone was that this would be exactly what it would be like if my grandparents ended up having to handle the insurrection. #January6thCommitteeHearings, How does it feel to know that YOUR PRESIDENT FAILED YOU ON JAN. 6th… “ I’ll be there with you “ \n\nNow, Trump has to appear before Congress.\n#January6thCommitteeHearings]"
1,0,207,0_subpoena_committee_subpoenaed_trump,"[subpoena, committee, subpoenaed, trump, voted, former, donald, to, president, the]","[RT @votevets: BREAKING: The @January6thCmte has voted to subpoena Donald John Trump. #January6thCommitteeHearings, RT @joekatz45: #BREAKING: the @January6thCmte just voted unanimously to subpoena former President Donald Trump.\n\n#January6thCommitteeHearin…, The January 6 committee issued former-president Donald Trump a subpoena regarding his involvement in the insurrection on the capital. \n\nDo you think he’ll show &amp; answer their questions? \n\n#January6thCommitteeHearings \n#January6thHearings #january6 #Subpoena #TrumpSubpoena]"
2,1,199,1_trumpcoupattempt_trump_day_trumpisgoingtojail,"[trumpcoupattempt, trump, day, trumpisgoingtojail, trumpisgoingtoprison, january6thcommitteehearings, rt, httpstcolwsvdseddq, donald, is]","[@TheDailyShow Let's just cut to the Chase \nand Indict #trump Now? 👍😁\nhttps://t.co/lwsVdSeDdQ\n\n#January6thCommitteeHearings #j6 #J6Hearings #IndictTrumpNow #TrumpCrimeFamily #TrumpIsGoingToJail #TrumpCoupAttempt #Trumpisgoingtoprison #SCOTUS https://t.co/Xc88vqdlI7, Who wants to cut to the Chase \nand Indict #trump Now? 👍😁\nhttps://t.co/lwsVdSeDdQ\n\n#January6thCommitteeHearings #j6 #J6Hearings #IndictTrumpNow #TrumpCrimeFamily #TrumpIsGoingToJail #TrumpCoupAttempt #Trumpisgoingtoprison #SCOTUS https://t.co/FVDDZNG2Bq, @mmpadellan Everyday is a bad day for #Trump 😁\nhttps://t.co/lwsVdSeDdQ\n\n#January6thCommitteeHearings #j6 #J6Hearings #IndictTrumpNow #TrumpCrimeFamily #TrumpIsGoingToJail #TrumpCoupAttempt #Trumpisgoingtoprison #SCOTUS #TrumpisaNationalDisgrace https://t.co/cZ0vFMTMx2]"
3,2,163,2_and_forthepeople_peopleoverpolitics_standwithukraine,"[and, forthepeople, peopleoverpolitics, standwithukraine, votebluein2022, republicanwaronseniors, lgbtqhistorymonth, socialsecurityisourmoney, inflationreductionact, roevember]","[@santiagomayer_ #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @fred_guttenberg #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @H_MitchellPhoto #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...]"
4,3,148,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,"[rt, january6thcommitteehearings, watching, httpstcofcaejdpxxi, marshablackburn, stunning, you, january6thhearings, me, many]","[Next Role: @RepLizCheney I know the curtain came down on your performance yesterday but keep your chin up your next starring role is not far off. #MissPiggy #MuppetCheney #January6thCommitteeHearings #January6th https://t.co/7JAr6GSXQs, RT @David_Leavitt: Are you watching?\n\n#January6thCommitteeHearings, RT @sn00pdad: Next Role: @RepLizCheney I know the curtain came down on your performance yesterday but keep your chin up your next starring…]"
5,4,139,4_pelosi_nancy_speaker_badass,"[pelosi, nancy, speaker, badass, schumer, boss, she, her, phone, is]","[I'm a bigger fan of the speaker of the house Nancy Pelosi today more than I ever was #MorningJoe #January6thCommitteeHearings, On Jan 6 speaker Pelosi was President pelosi she was the ONLY one truly leading that day and being a true leader #January6thCommitteeHearings, Speaker Pelosi was THE president that day.\n\n#January6thCommitteeHearings]"
6,5,104,5_speakerpelosi_her_leadership_she,"[speakerpelosi, her, leadership, she, is, pressure, cool, boss, senschumer, january6thcommitteehearings]","[@therecount @TheRickWilson @SpeakerPelosi worked her ass off that day, kept her cool, to SAVE ALL OF US! Thank you! #January6thCommitteeHearings #CNN #democracy, Now we know @SpeakerPelosi was the #POTUS on #January6th #January6thCommitteeHearings #Jan, RT @janna_bastone: Watching @SpeakerPelosi in action on #January6th (knowing her age) and I just NEED to know what her vitamin regime is! I…]"
7,6,97,6_testify_he_will_testifying,"[testify, he, will, testifying, trump, oath, before, under, to, him]","[#January6thCommitteeHearings If Trump wants America to believe him,if he has nothing to hide, he should testify under oath. Thats it.....simple. put up or shut up., I want to see Trump testify LIVE!!! #January6thCommitteeHearings, Will Donald Trump testify for the #January6thCommitteeHearings #DonaldTrump]"
8,7,95,7_nancypelosi_nancy_her_like,"[nancypelosi, nancy, her, like, phone, she, speakerpelosi, hawleymo, democracy, ran]","[Hey, #NancyPelosi … it helps to turn your phone on..#LyingNancy #January6thCommitteeHearings is a sh*tshow. Fake video of @SpeakerPelosi calling for help. She set it all up, she didn’t want any help. #SwampRats #VoteRed https://t.co/Jn7Ao4uU0V, @NancyPelosi is just a mother F*CKING BADASS PERIODT!! She did not bust a sweat on January the 6th and took things into her own hands #Maddow #MSNBC #January6thCommitteeHearings #NancyPelosi https://t.co/2jWJxHrASS, #NancyPelosi for President. #January6thCommitteeHearings]"
9,8,94,8_secret_service_knew_agents,"[secret, service, knew, agents, the, were, capitol, fbi, they, security]","[RT @lakeinla: Why aren't they doing anything about the Secret Service? Why is the Secret Service above the law? They knew and they chose to…, RT @Cindy_Banyai_FL: Secret Service knew about the “crazies” and knew people were armed the morning of Jan 6 #January6thCommitteeHearings, The secret service was in on it. #January6thCommitteeHearings]"


#### BERTopic Get Document Info
Use as starting point to develop dataframe for intruder work

In [88]:
bertopic_doc_info = topic_model.get_document_info(tweet_list)

In [99]:
bertopic_doc_info.head()

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,@ReallyAmerican1 #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,"[and, forthepeople, peopleoverpolitics, standwithukraine, votebluein2022, republicanwaronseniors, lgbtqhistorymonth, socialsecurityisourmoney, inflationreductionact, roevember]","[@santiagomayer_ #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @fred_guttenberg #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @H_MitchellPhoto #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...]",and - forthepeople - peopleoverpolitics - standwithukraine - votebluein2022 - republicanwaronseniors - lgbtqhistorymonth - socialsecurityisourmoney - inflationreductionact - roevember,0.568121,False
1,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,62,62_guard_national_miller_deploy,"[guard, national, miller, deploy, chris, sec, defense, 14, acting, only]","[RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @PatWay1979: ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national guard!!!! Chris Miller denied them 14 t…, ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national guard!!!! Chris Miller denied them 14 times !!!!!!! He was installed by trump on November 9 for this purpose. \n\n“BUT BUT BUT HUNTER BIDEN!!!\n\n#January6thCommitteeHearings https://t.co/DOxFDOd1k9]",guard - national - miller - deploy - chris - sec - defense - 14 - acting - only,1.0,True
2,RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…,-1,-1_the_to_january6thcommitteehearings_of,"[the, to, january6thcommitteehearings, of, in, rt, is, that, you, for]","[#January6thCommitteeHearings Trump's *own* campaign managers and lawyers were telling him that they could find no evidence of election fraud and he had lost. Yet majority of republicans still believe the election was stolen. Do you see the power of corrupt media? @FoxNews @GOP, All I could think of watching that footage of Pelosi and Schumer on the phone was that this would be exactly what it would be like if my grandparents ended up having to handle the insurrection. #January6thCommitteeHearings, How does it feel to know that YOUR PRESIDENT FAILED YOU ON JAN. 6th… “ I’ll be there with you “ \n\nNow, Trump has to appear before Congress.\n#January6thCommitteeHearings]",the - to - january6thcommitteehearings - of - in - rt - is - that - you - for,0.606865,False
3,RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…,-1,-1_the_to_january6thcommitteehearings_of,"[the, to, january6thcommitteehearings, of, in, rt, is, that, you, for]","[#January6thCommitteeHearings Trump's *own* campaign managers and lawyers were telling him that they could find no evidence of election fraud and he had lost. Yet majority of republicans still believe the election was stolen. Do you see the power of corrupt media? @FoxNews @GOP, All I could think of watching that footage of Pelosi and Schumer on the phone was that this would be exactly what it would be like if my grandparents ended up having to handle the insurrection. #January6thCommitteeHearings, How does it feel to know that YOUR PRESIDENT FAILED YOU ON JAN. 6th… “ I’ll be there with you “ \n\nNow, Trump has to appear before Congress.\n#January6thCommitteeHearings]",the - to - january6thcommitteehearings - of - in - rt - is - that - you - for,0.729519,False
4,RT @libradunn: That shit was heavy. ✌🏽 y'all take care 🖤\n\n#January6thCommitteeHearings,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,"[rt, january6thcommitteehearings, watching, httpstcofcaejdpxxi, marshablackburn, stunning, you, january6thhearings, me, many]","[Next Role: @RepLizCheney I know the curtain came down on your performance yesterday but keep your chin up your next starring role is not far off. #MissPiggy #MuppetCheney #January6thCommitteeHearings #January6th https://t.co/7JAr6GSXQs, RT @David_Leavitt: Are you watching?\n\n#January6thCommitteeHearings, RT @sn00pdad: Next Role: @RepLizCheney I know the curtain came down on your performance yesterday but keep your chin up your next starring…]",rt - january6thcommitteehearings - watching - httpstcofcaejdpxxi - marshablackburn - stunning - you - january6thhearings - me - many,0.219042,False


In [90]:
pd.set_option('display.max_colwidth', None)

#### Eyeball content in top two topics

In [91]:
topic0 = bertopic_doc_info[bertopic_doc_info['Topic'] == 0]

In [92]:
topic0.drop(['Representation', 'Representative_Docs', 'Top_n_words', 'Probability', 'Representative_document'], axis=1, inplace=True)

In [93]:
topic0.head()

Unnamed: 0,Document,Topic,Name
43,RT @AdamKinzinger: We just voted unanimously to subpoena Donald Trump to testify UNDER OATH.\nDemocracy demands it.\nHistory deserves it.\n#Ja…,0,0_subpoena_committee_subpoenaed_trump
54,"@Cindy_Banyai_FL #SUPPORTELECT @Cindy_Banyai_FL\n TO HELP US House candidate,FL-19\n#VOTEFORPROTECTYOURRIGHTS\n\n @Cindy_Banyai_FL\nThe committee voted to subpoena Trump for his role in Jan 6 and his attempt to overthrow the US Election #January6thCommitteeHearings https://t.co/7kPbOwJN5Q",0,0_subpoena_committee_subpoenaed_trump
62,RT @TheYoungTurks: Coming up on #tytlive:\n\n#Trump skirts testimony question in hostile 14-page #January6thCommitteeHearings response. \n\nSto…,0,0_subpoena_committee_subpoenaed_trump
75,The January 6 committee issued former-president Donald Trump a subpoena regarding his involvement in the insurrection on the capital. \n\nDo you think he’ll show &amp; answer their questions? \n\n#January6thCommitteeHearings \n#January6thHearings #january6 #Subpoena #TrumpSubpoena,0,0_subpoena_committee_subpoenaed_trump
90,Coming up on #tytlive:\n\n#Trump skirts testimony question in hostile 14-page #January6thCommitteeHearings response. \n\nStory by: @thehill\n\nhttps://t.co/8SBoKnnSOU,0,0_subpoena_committee_subpoenaed_trump


In [94]:
topic1 = bertopic_doc_info[bertopic_doc_info['Topic'] == 1]

In [95]:
topic1.drop(['Representation', 'Representative_Docs', 'Top_n_words', 'Probability', 'Representative_document'], axis=1, inplace=True)

In [100]:
topic1.head()

Unnamed: 0,Document,Topic,Name
22,@newsmax This shows (Trump) he's in Trouble #January6thCommitteeHearings #TrumpRussianGoon #KeepTrying #MyOpinion,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail
30,RT @_Peace_Love_US: Do you think Trump is going to No Show for the #January6thCommitteeHearings ? \n\n#FreshResists https://t.co/1emFKXClXZ,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail
40,"#January6thCommitteeHearings #Jan6Hearings trump #trump Liz Cheney\n\n#14.\n\nthat irate orate---you sure have the style\nbut ""stuff's"" piling up on you---look back at the pile\nroy cohn &amp; your dad started an avalanche of wrong\nthe longest loser of all time---what a pathetic prolong. https://t.co/csCNYIR9f6",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail
42,RT @StandForBetter: This says it all.\n\n#TrumpCoupAttempt\n#January6thCommitteeHearings https://t.co/95KjEX8fAF,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail
166,RT @MKerklin: Hope Trump was wearing his brown pants today. #January6thCommitteeHearings,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail


# Create Intruder Batches 
Use these top topics:
- 0_subpoena_committee_subpoenaed_trump
- 1_trumpcoupattempt_trump_day_trumpisgoingtojail
- 2_and_forthepeople_peopleoverpolitics_standwithukraine
- 3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi

In [97]:
#dataframe that strips unecessary info when running through batch function
bertopic_doc_info2 = bertopic_doc_info.copy()

In [98]:
bertopic_doc_info2.drop(['Representation', 'Representative_Docs', 'Top_n_words', 'Probability', 'Representative_document'], axis=1, inplace=True)

In [102]:
bertopic_doc_info2.head()

Unnamed: 0,Document,Topic,Name
0,@ReallyAmerican1 #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine
1,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,62,62_guard_national_miller_deploy
2,RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…,-1,-1_the_to_january6thcommitteehearings_of
3,RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…,-1,-1_the_to_january6thcommitteehearings_of
4,RT @libradunn: That shit was heavy. ✌🏽 y'all take care 🖤\n\n#January6thCommitteeHearings,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi


In [128]:
def create_batch(df, main_topic, specific_topic):
    # Get unique topics
    topics = df['Name'].unique()

    # Check if the main_topic is in the topics
    if main_topic not in topics:
        raise ValueError(f"Main topic '{main_topic}' not found in the DataFrame.")
    
    batch_topic_df = df[df['Name'] == main_topic]
    batch = batch_topic_df.sample(n=4).to_dict('records')
    
    # Mark these documents as not intruders
    for doc in batch:
        doc['intruder'] = 0

    # Select intruder documents from another topic
    intruder_df = df[df['Name'] == specific_topic].sample(1)

    # Converting ths sampled 'intruder_df' into a list of dicts and marking as intruders
    intruders = intruder_df.to_dict('records')  
    for intruder in intruders:
        intruder['intruder'] = 1

    # Add the intruder document to the batch
    batch.extend(intruders)
    
    # Shuffle the batch to randomly position the intruder
    random.shuffle(batch)

    return batch

In [133]:
#creates list of dicts
topic0_batch1 = create_batch(bertopic_doc_info2, '0_subpoena_committee_subpoenaed_trump', '1_trumpcoupattempt_trump_day_trumpisgoingtojail')
#creates dataframe from list of dicts
topic0_batch1 = pd.DataFrame.from_records(topic0_batch1)

In [134]:
topic0_batch1

Unnamed: 0,Document,Topic,Name,intruder
0,I guess the government finally gave Trump an official document he CAN keep!\n#Subpoena #January6thCommitteeHearings,0,0_subpoena_committee_subpoenaed_trump,0
1,Trump responds to Jan. 6 committee subpoena vote [READ FULL MEMO]\nhttps://t.co/slB55c6ydD\n#January6thCommitteeHearings \n#CapitolRiot \n@WBAP247NEWS,0,0_subpoena_committee_subpoenaed_trump,0
2,"RT @PithyTruth: @JonathanTurley BEING A TRUMP APOLOGIST, you criticize subpoena for him being issued today, but say nothing about #January6…",0,0_subpoena_committee_subpoenaed_trump,0
3,"Jan. 6 panel subpoenas Trump, shows startling new video\n#January6thCommitteeHearings\n#TrumpCoupAttempt\n#TrumpIsGoingToJail \nhttps://t.co/dcO5CMjPcH",0,0_subpoena_committee_subpoenaed_trump,0
4,"#HATH #January6thCommitteeHearings \n#Pennsylvania 🚨 #Mastriano @ 2.20.21\n""Donald Trump isn't done. Having been w/ him quite a bit. He's the real deal. Rebbie prayed w/him last month. We need a breakthrough on the anniversary of Esther's big moment""\nCC @andre_gagne1 @jennycohn1 https://t.co/euJndumK3K",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,1


In [137]:
#creates list of dicts
topic0_batch2 = create_batch(bertopic_doc_info2, '0_subpoena_committee_subpoenaed_trump', '2_and_forthepeople_peopleoverpolitics_standwithukraine')
#creates dataframe from list of dicts
topic0_batch2 = pd.DataFrame.from_records(topic0_batch2)

In [138]:
topic0_batch2

Unnamed: 0,Document,Topic,Name,intruder
0,RT @winnie_LaLisa: Donald John Trump was unanimously voted to receive a subpoena by the Jan 6 committee. The MAGA Republicans are fuming #J…,0,0_subpoena_committee_subpoenaed_trump,0
1,RT @allegedlyangelo: THEY ARE ACTUALLY SUBPOENAING TRUMP AND HE HAS TO TESTIFY #January6thCommitteeHearings https://t.co/mrcTX4gFxx,0,0_subpoena_committee_subpoenaed_trump,0
2,"While it’s nice to see the Committee subpoena Trump, ultimately, nothing will come from it. He literally stole a foreign nation’s nuclear specs and nothing happened. An insurrection sure as hell doesn’t matter. Nothing will happen, sadly.\n\n#January6thCommitteeHearings",0,0_subpoena_committee_subpoenaed_trump,0
3,"On the same day treasonous Donald Trump was subpoenaed during the #January6thCommitteeHearings, the U.S. Supreme Court refused to intervene in the Mar-a-Lago classified documents case against the criminal former president. \n\nhttps://t.co/orDbhOSkte",0,0_subpoena_committee_subpoenaed_trump,0
4,@duty2warn #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,1


In [139]:
topic0_batch3 = create_batch(bertopic_doc_info2, '0_subpoena_committee_subpoenaed_trump', '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi')
topic0_batch3 = pd.DataFrame.from_records(topic0_batch3)

In [140]:
topic0_batch3

Unnamed: 0,Document,Topic,Name,intruder
0,The Committee is offering him an opportunity to answer questions ... an innocent man would jump at the chance to respond to what's been disclosed. #FactsMatter #January6thCommitteeHearings https://t.co/GbZwhixoVH,0,0_subpoena_committee_subpoenaed_trump,0
1,Trump to be subpoenaed by Jan. 6 panel as the ‘central cause’ of Capitol #insurrection https://t.co/Wz2kvevmyK via @ncpolicywatch #ncpol #nc #January6thCommitteeHearings #Trump,0,0_subpoena_committee_subpoenaed_trump,0
2,RT @GrahamAllen_1: #january6thcommitteehearings are a WITCH HUNT.,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,1
3,"Subpoena = a small blip in the road for someone like Donald Trump \n\nTrump will not give a rat’s ass about being issued with a subpoena, and nothing will happen to him. If subpoenas are now only performative, then this country is really doomed.\n\n#January6thCommitteeHearings",0,0_subpoena_committee_subpoenaed_trump,0
4,Subpoena gives Trump the chance to Clear the record. #January6thCommitteeHearings He won't show because he'd perjure himself. Hey Junior @DonaldJTrumpJr your thoughts? @January6thCmte,0,0_subpoena_committee_subpoenaed_trump,0


In [141]:
#creates list of dicts
topic1_batch1 = create_batch(bertopic_doc_info2, '1_trumpcoupattempt_trump_day_trumpisgoingtojail', '0_subpoena_committee_subpoenaed_trump')
#creates dataframe from list of dicts
topic1_batch1 = pd.DataFrame.from_records(topic1_batch1)

In [142]:
topic1_batch1

Unnamed: 0,Document,Topic,Name,intruder
0,@RonFilipkowski No he ran to p- —y named Donald J Trump. Pretty soon his fist will be hanging from a prison cell. #January6thCommitteeHearings #January6thHearings https://t.co/NcTyRGpcOI,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
1,RT @mari45678910: Today was Donald Trump's no good very bad day.\n#LastWord \n#TrumpMeltdown \n#January6thCommitteeHearings https://t.co/MwKxz…,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
2,@RNCLatinos When #TrumpCrimeFamily was refusing to leave office #MAGAterrorist did this. #January6thCommitteeHearings #TrumpStoleTheDocs #TrumpIsGoingToJail https://t.co/eEzEUfFZmI,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
3,BREAKING!!! BIG Trumps response Must Read\n\n#January6thCommitteeHearings #trump\n\nhttps://t.co/LlpSfWvjiA,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
4,"Average person. Today’s #January6thCommitteeHearings made me write this post. Now, if you’re a President or former President. You can say f**k that subpoena because unlike everyday citizens/people, it appears that you’re above the law. Hmm #PoliticsOnMyMind",0,0_subpoena_committee_subpoenaed_trump,1


In [143]:
#creates list of dicts
topic1_batch2 = create_batch(bertopic_doc_info2, '1_trumpcoupattempt_trump_day_trumpisgoingtojail', '2_and_forthepeople_peopleoverpolitics_standwithukraine')
#creates dataframe from list of dicts
topic1_batch2 = pd.DataFrame.from_records(topic1_batch2)

In [144]:
topic1_batch2

Unnamed: 0,Document,Topic,Name,intruder
0,Treat Trump like the Rosenbergs #January6thCommitteeHearings,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
1,RT @TrivWorks: “I’ve never even met Donald Trump…” #January6thCommitteeHearings https://t.co/7ReV1C7FBL,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
2,@cnnbrk @CNNPolitics @AlisynCamerota I wish these types of moments can transcend the divide in this country. #January6thCommitteeHearings,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,1
3,RT @evannejordan: @martin_kj @ruthiedavis Any bad day for trump is a GREAT DAY FOR DEMOCRACY! #TrumpCoupAttempt ##January6thCommitteeHearin…,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
4,@TimRunsHisMouth How can you possibly run your mouth with #Trump 's dick in it at all times? #magaloid #January6thCommitteeHearings #wottabitch,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0


In [145]:
#creates list of dicts
topic1_batch3 = create_batch(bertopic_doc_info2, '1_trumpcoupattempt_trump_day_trumpisgoingtojail', '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi')
#creates dataframe from list of dicts
topic1_batch3 = pd.DataFrame.from_records(topic1_batch3)

In [146]:
topic1_batch3

Unnamed: 0,Document,Topic,Name,intruder
0,"RT @la_natif: @AnnTelnaes 👑 Ann ...\nYou underrated your magnificent creation. \n\nExpanded with the roll call glossary of ""enablers and insti…",3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,1
1,Where will Trump finally end up\n#January6thCommitteeHearings,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
2,BREAKING!!! BIG Trumps response Must Read\n\n#January6thCommitteeHearings #trump\n\nhttps://t.co/LlpSfWvjiA,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
3,A bad day for Trump is a good day for liberty in the United States. #January6thCommitteeHearings,1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0
4,"Yeah, you'll get him this time, leftys. 🤣🤣🤣🤣🤣🤣\n#January6thCommitteeHearings\n\n#TrumpIsGoingToJail https://t.co/oo5lsLO41E",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,0


In [147]:
topic2_batch1 = create_batch(bertopic_doc_info2, '2_and_forthepeople_peopleoverpolitics_standwithukraine', '0_subpoena_committee_subpoenaed_trump')
#creates dataframe from list of dicts
topic2_batch1 = pd.DataFrame.from_records(topic2_batch1)

In [148]:
topic2_batch1

Unnamed: 0,Document,Topic,Name,intruder
0,"@RockwaterReport @yaya_gumbo @sarmentovince2 @10903 @club_ed8 @JoslinSam @Hamletgarcia17 @NewaiGreen @klj998866 @fedmech @Tillman_40 @garyelam23 @belladonnalla @donna__sloan @DHarryMcGee1 @dtannie @zuzsmama2 @DailyCoffee533 @SorinsSomnium @Holly2360 @Torrid_Zone @Liberty_Belle04 @Zuzusmama @_SilentMaj0rity @JennyDupuis5 @MichaelDBlush1 @dollahollayo @Kybelle195739 @KatTheWonk @cspanwj @boringfileclerk @DanaScu64550786 @writergary @cspan If @cspan @cspanwj #WJAM really wanted to avoid manifest destiny #VoteOutMoreDems #Election2022 they should have condemned Dems #January6thCommitteeHearings as not only unethical, immoral &amp; an unconstitutional violation of Separation of Powers👇\nhttps://t.co/u0aJQpu88A",2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
1,"“January 6th Committee votes to subpoena Former President of the United States, Donald J. Trump.”\n\nI voted for this oversight and enforcement of the law. \n\nLike this tweet if you did too. \n\n#January6thCommitteeHearings",0,0_subpoena_committee_subpoenaed_trump,1
2,"It truly breaks my heart to see @SpeakerPelosi @SenSchumer sticking together like two genuine, loving, caring grandparents 😢 @TheDemocrats in majority truly are a caring party of people. #January6thCommitteeHearings",2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
3,RT @Capp97403345: @ashley_ehasz #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathB…,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
4,@parkerpbutler #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0


In [149]:
topic2_batch2 = create_batch(bertopic_doc_info2, '2_and_forthepeople_peopleoverpolitics_standwithukraine', '1_trumpcoupattempt_trump_day_trumpisgoingtojail')
#creates dataframe from list of dicts
topic2_batch2 = pd.DataFrame.from_records(topic2_batch2)

In [150]:
topic2_batch2

Unnamed: 0,Document,Topic,Name,intruder
0,@EverettStern1 #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
1,@michael_muscato #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
2,"Trump thought re-arranging boxes at his mar-or-let-me-go Estate, might confuse, or his trail to election victory was definite. No one was capable of distracting the #January6thCommitteeHearings. Their work was conducted admirably. Real Patriots never supported Trump…NEVER! https://t.co/SwkEgcrmoH",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,1
3,@politvidchannel #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
4,@madisonhornok #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0


In [153]:
topic2_batch3 = create_batch(bertopic_doc_info2, '2_and_forthepeople_peopleoverpolitics_standwithukraine', '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi')
#creates dataframe from list of dicts
topic2_batch3 = pd.DataFrame.from_records(topic2_batch3)

In [154]:
topic2_batch3

Unnamed: 0,Document,Topic,Name,intruder
0,"RT @EandPCartoons: Graeme MacKay, The Hamilton Spectator #January6thCommitteeHearings https://t.co/iceI65a57u",3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,1
1,@fred_guttenberg #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
2,@GayleGirl5 @donwinslow #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
3,@BidensWins 🤚\n#Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0
4,@EvanMcMullin #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,0


In [155]:
topic3_batch1 = create_batch(bertopic_doc_info2, '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi', '0_subpoena_committee_subpoenaed_trump')
#creates dataframe from list of dicts
topic3_batch1 = pd.DataFrame.from_records(topic3_batch1)

In [156]:
topic3_batch1

Unnamed: 0,Document,Topic,Name,intruder
0,"Can the #January6thCommitteeHearings subpoena of 🎃 be enforced? Yes. \n\n""An 1857 law says failure to comply with a congressional subpoena for testimony or documents is punishable by one to 12 months imprisonment.""\n\nhttps://t.co/wQkaecdvkr",0,0_subpoena_committee_subpoenaed_trump,1
1,RT @gretchen0912: they’re pissed\n#January6thCommitteeHearings\n#January6thHearings https://t.co/2AFKg2ZMVs,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
2,@WillMeyerPA @leahcohenPOTB @Andiejoan Getting smashed by those who are saying I am anti because I had my students watching or some listening to #January6thCommitteeHearings today as an educator they need to hear and learn,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
3,RT @LorenRuizLopez1: @GOPChairwoman Wonder how many times your name is gonna come up on #January6thCommitteeHearings ?!!🤔😁,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
4,"RT @Lauriedarln: Worth a look, if you missed it before\n#USSS \n#January6thCommitteeHearings",3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0


In [157]:
topic3_batch2 = create_batch(bertopic_doc_info2, '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi', '1_trumpcoupattempt_trump_day_trumpisgoingtojail')
#creates dataframe from list of dicts
topic3_batch2 = pd.DataFrame.from_records(topic3_batch2)

In [158]:
topic3_batch2

Unnamed: 0,Document,Topic,Name,intruder
0,@RonnyJacksonTX Tick Tock👇🏼🖕🏼\n#January6thCommitteeHearings https://t.co/fPbjQ2D5gM,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
1,"Yeah, you'll get him this time, leftys. 🤣🤣🤣🤣🤣🤣\n#January6thCommitteeHearings\n\n#TrumpIsGoingToJail https://t.co/oo5lsLO41E",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,1
2,@TheRickWilson @SteveSchmidtSES @FPWellman watching #thelincolnproject on @Showtime hits different after today’s #January6thCommitteeHearings,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
3,RT @lgfc2: #January6thCommitteeHearings #Jan6thHearings #jan6hearing #jan6justice NM found it on YouTube If you r like me and missed it…,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
4,@WillMeyerPA @leahcohenPOTB @Andiejoan Getting smashed by those who are saying I am anti because I had my students watching or some listening to #January6thCommitteeHearings today as an educator they need to hear and learn,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0


In [159]:
topic3_batch3 = create_batch(bertopic_doc_info2, '3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi', '2_and_forthepeople_peopleoverpolitics_standwithukraine')
#creates dataframe from list of dicts
topic3_batch3 = pd.DataFrame.from_records(topic3_batch3)

In [160]:
topic3_batch3

Unnamed: 0,Document,Topic,Name,intruder
0,RT @DFAaction: Join us. #January6thCommitteeHearings \n\nhttps://t.co/NtVl55ZVoM https://t.co/wr4y6QP34u,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
1,RT @DFAaction: Join us. #January6thCommitteeHearings \n\nhttps://t.co/NtVl55ZVoM,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
2,"RT @joekatz45: Boy, when you put this all together, it sounds pretty bad, huh.\n\n#January6thCommitteeHearings",3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0
3,"Sage advice about life, business (&amp; politics), from the #MCU.\n\n#January6thCommitteeHearings #TrumpSubpoena #TwitterPurge\n#MidtermElections #ElonTwitter\n#CancelCulture #DoubleStandards\n#propoganda #LiberalHypocrisy\n#censorship #PayPal #PaypalGoWokeGoBroke\n\nhttps://t.co/xEbpGEyxLe",2,2_and_forthepeople_peopleoverpolitics_standwithukraine,1
4,RT @DworkinReport: Have you listened to this interview yet?\n#January6thCommitteeHearings \n\nhttps://t.co/DEoqJjTe7B,3,3_rt_january6thcommitteehearings_watching_httpstcofcaejdpxxi,0


#### All Batches into Dataframe

In [182]:
frames = [topic0_batch1,
topic0_batch2,
topic0_batch3,
topic1_batch1,
topic1_batch2,
topic1_batch3,
topic2_batch1,
topic2_batch2,
topic2_batch3,
topic3_batch1,
topic3_batch2,
topic3_batch3]

In [183]:
result = pd.concat(frames)

In [184]:
# reset index
result.reset_index(drop=True, inplace=True)

In [185]:
result.head(20)

Unnamed: 0,Document,Topic,Name,intruder
0,I guess the government finally gave Trump an official document he CAN keep!\n#Subpoena #January6thCommitteeHearings,0,0_subpoena_committee_subpoenaed_trump,0
1,Trump responds to Jan. 6 committee subpoena vote [READ FULL MEMO]\nhttps://t.co/slB55c6ydD\n#January6thCommitteeHearings \n#CapitolRiot \n@WBAP247NEWS,0,0_subpoena_committee_subpoenaed_trump,0
2,"RT @PithyTruth: @JonathanTurley BEING A TRUMP APOLOGIST, you criticize subpoena for him being issued today, but say nothing about #January6…",0,0_subpoena_committee_subpoenaed_trump,0
3,"Jan. 6 panel subpoenas Trump, shows startling new video\n#January6thCommitteeHearings\n#TrumpCoupAttempt\n#TrumpIsGoingToJail \nhttps://t.co/dcO5CMjPcH",0,0_subpoena_committee_subpoenaed_trump,0
4,"#HATH #January6thCommitteeHearings \n#Pennsylvania 🚨 #Mastriano @ 2.20.21\n""Donald Trump isn't done. Having been w/ him quite a bit. He's the real deal. Rebbie prayed w/him last month. We need a breakthrough on the anniversary of Esther's big moment""\nCC @andre_gagne1 @jennycohn1 https://t.co/euJndumK3K",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,1
5,RT @winnie_LaLisa: Donald John Trump was unanimously voted to receive a subpoena by the Jan 6 committee. The MAGA Republicans are fuming #J…,0,0_subpoena_committee_subpoenaed_trump,0
6,RT @allegedlyangelo: THEY ARE ACTUALLY SUBPOENAING TRUMP AND HE HAS TO TESTIFY #January6thCommitteeHearings https://t.co/mrcTX4gFxx,0,0_subpoena_committee_subpoenaed_trump,0
7,"While it’s nice to see the Committee subpoena Trump, ultimately, nothing will come from it. He literally stole a foreign nation’s nuclear specs and nothing happened. An insurrection sure as hell doesn’t matter. Nothing will happen, sadly.\n\n#January6thCommitteeHearings",0,0_subpoena_committee_subpoenaed_trump,0
8,"On the same day treasonous Donald Trump was subpoenaed during the #January6thCommitteeHearings, the U.S. Supreme Court refused to intervene in the Mar-a-Lago classified documents case against the criminal former president. \n\nhttps://t.co/orDbhOSkte",0,0_subpoena_committee_subpoenaed_trump,0
9,@duty2warn #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,1


In [186]:
#create column with intruder index
result['intruder_index'] = None

In [187]:
# Assign the index to the new column where column 'B' is True
result.loc[result['intruder'] == True, 'intruder_index'] = result.index[result['intruder'] == True]

In [188]:
result

Unnamed: 0,Document,Topic,Name,intruder,intruder_index
0,I guess the government finally gave Trump an official document he CAN keep!\n#Subpoena #January6thCommitteeHearings,0,0_subpoena_committee_subpoenaed_trump,0,
1,Trump responds to Jan. 6 committee subpoena vote [READ FULL MEMO]\nhttps://t.co/slB55c6ydD\n#January6thCommitteeHearings \n#CapitolRiot \n@WBAP247NEWS,0,0_subpoena_committee_subpoenaed_trump,0,
2,"RT @PithyTruth: @JonathanTurley BEING A TRUMP APOLOGIST, you criticize subpoena for him being issued today, but say nothing about #January6…",0,0_subpoena_committee_subpoenaed_trump,0,
3,"Jan. 6 panel subpoenas Trump, shows startling new video\n#January6thCommitteeHearings\n#TrumpCoupAttempt\n#TrumpIsGoingToJail \nhttps://t.co/dcO5CMjPcH",0,0_subpoena_committee_subpoenaed_trump,0,
4,"#HATH #January6thCommitteeHearings \n#Pennsylvania 🚨 #Mastriano @ 2.20.21\n""Donald Trump isn't done. Having been w/ him quite a bit. He's the real deal. Rebbie prayed w/him last month. We need a breakthrough on the anniversary of Esther's big moment""\nCC @andre_gagne1 @jennycohn1 https://t.co/euJndumK3K",1,1_trumpcoupattempt_trump_day_trumpisgoingtojail,1,4.0
5,RT @winnie_LaLisa: Donald John Trump was unanimously voted to receive a subpoena by the Jan 6 committee. The MAGA Republicans are fuming #J…,0,0_subpoena_committee_subpoenaed_trump,0,
6,RT @allegedlyangelo: THEY ARE ACTUALLY SUBPOENAING TRUMP AND HE HAS TO TESTIFY #January6thCommitteeHearings https://t.co/mrcTX4gFxx,0,0_subpoena_committee_subpoenaed_trump,0,
7,"While it’s nice to see the Committee subpoena Trump, ultimately, nothing will come from it. He literally stole a foreign nation’s nuclear specs and nothing happened. An insurrection sure as hell doesn’t matter. Nothing will happen, sadly.\n\n#January6thCommitteeHearings",0,0_subpoena_committee_subpoenaed_trump,0,
8,"On the same day treasonous Donald Trump was subpoenaed during the #January6thCommitteeHearings, the U.S. Supreme Court refused to intervene in the Mar-a-Lago classified documents case against the criminal former president. \n\nhttps://t.co/orDbhOSkte",0,0_subpoena_committee_subpoenaed_trump,0,
9,@duty2warn #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,2,2_and_forthepeople_peopleoverpolitics_standwithukraine,1,9.0


In [189]:
#result.to_csv('bertopic_no_dupes.csv')

# **KeyBERT-Inspired Model**
- Reduce the appearance of stop words. Often improves the topic representation:
- https://maartengr.github.io/BERTopic/api/representation/keybert.html
- https://maartengr.github.io/BERTopic/getting_started/representation/representation.html

In [40]:
from bertopic.representation import KeyBERTInspired

# Conceptually: BERTopic is architectual blueprint for a house and keybert is a specific feature that you can build in a new house
# Conceptually: make the keybert feature of the house:
representation_model = KeyBERTInspired()

keybert_model = BERTopic(language="english", calculate_probabilities=True, verbose=False, umap_model = umap_model,representation_model=representation_model).fit(tweet_list)

In [None]:
#pd.set_option('display.max_colwidth', None)

#### KeyBERT topic Representations

In [45]:
keybert_info = keybert_model.get_topic_info()

In [46]:
keybert_info.head(8)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,2071,-1_january6thcommitteehearings_january_electio...,"[january6thcommitteehearings, january, electio...",[#January6thCommitteeHearings Trump's *own* ca...
1,0,207,0_trumpsubpoena_subpoenadonaldtrump_subpoena_s...,"[trumpsubpoena, subpoenadonaldtrump, subpoena,...",[RT @votevets: BREAKING: The @January6thCmte h...
2,1,199,1_trumpisgoingtoprison_trumpisgoingtojail_trum...,"[trumpisgoingtoprison, trumpisgoingtojail, tru...",[@TheDailyShow Let's just cut to the Chase \na...
3,2,163,2_republicanwaronseniors_lgbtqhistorymonth_sub...,"[republicanwaronseniors, lgbtqhistorymonth, su...",[@santiagomayer_ #Roevember and\n#ForThePeople...
4,3,148,3_january6thcommitteehearing_january6thcommitt...,"[january6thcommitteehearing, january6thcommitt...",[Next Role: @RepLizCheney I know the curtain c...
5,4,139,4_pelosi_pelosiisahero_january6thcommitteehear...,"[pelosi, pelosiisahero, january6thcommitteehea...",[I'm a bigger fan of the speaker of the house ...
6,5,104,5_january6thcommitteehearings_jan_speakerpelos...,"[january6thcommitteehearings, jan, speakerpelo...",[@therecount @TheRickWilson @SpeakerPelosi wor...
7,6,97,6_testify_testifying_testified_january6thcommi...,"[testify, testifying, testified, january6thcom...",[#January6thCommitteeHearings If Trump wants A...


In [47]:
keybert_info.shape

(87, 5)

#### keyBERT Get Document Info
Use as starting point to develop dataframe for intruder work

In [48]:
keybert_model.get_document_info(tweet_list)

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,2,2_republicanwaronseniors_lgbtqhistorymonth_sub...,"[republicanwaronseniors, lgbtqhistorymonth, su...",[@santiagomayer_ #Roevember and\n#ForThePeople...,republicanwaronseniors - lgbtqhistorymonth - s...,0.568121,False
1,RT @sandibachom: IS THIS THING ON???!!This is ...,62,62_guard_miller_sec_defense,"[guard, miller, sec, defense, dod, failedjanua...",[RT @sandibachom: IS THIS THING ON???!!This is...,guard - miller - sec - defense - dod - failedj...,1.000000,True
2,RT @tleehumphrey: Today is the beginning of th...,-1,-1_january6thcommitteehearings_january_electio...,"[january6thcommitteehearings, january, electio...",[#January6thCommitteeHearings Trump's *own* ca...,january6thcommitteehearings - january - electi...,0.606865,False
3,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,-1,-1_january6thcommitteehearings_january_electio...,"[january6thcommitteehearings, january, electio...",[#January6thCommitteeHearings Trump's *own* ca...,january6thcommitteehearings - january - electi...,0.729519,False
4,RT @libradunn: That shit was heavy. ✌🏽 y'all t...,3,3_january6thcommitteehearing_january6thcommitt...,"[january6thcommitteehearing, january6thcommitt...",[Next Role: @RepLizCheney I know the curtain c...,january6thcommitteehearing - january6thcommitt...,0.219042,False
...,...,...,...,...,...,...,...,...
5328,#January6thCommitteeHearings inflation is eati...,18,18_electionday_republicanshavenosolutions_demo...,"[electionday, republicanshavenosolutions, demo...",[When the #January6thCommitteeHearings wants t...,electionday - republicanshavenosolutions - dem...,0.248064,False
5329,Can we all agree that Donald Trump is the bigg...,22,22_traitorstoamerica_traitor_traitortrump_trai...,"[traitorstoamerica, traitor, traitortrump, tra...",[RT @Resist_MAGA_GOP: Can we all agree that Do...,traitorstoamerica - traitor - traitortrump - t...,0.112006,True
5330,@RonnyJacksonTX Hmm did you just not watch &am...,-1,-1_january6thcommitteehearings_january_electio...,"[january6thcommitteehearings, january, electio...",[#January6thCommitteeHearings Trump's *own* ca...,january6thcommitteehearings - january - electi...,0.343507,False
5331,#January6thCommitteeHearings and everyone runn...,11,11_january6thcommitteehearings_january6thheari...,"[january6thcommitteehearings, january6thhearin...",[nuclear war ain't so bad\n#January6thCommitte...,january6thcommitteehearings - january6thhearin...,0.366186,False


## Visualize Terms

In [53]:
topic_model.visualize_barchart(top_n_topics=7)

In [54]:
keybert_model.visualize_barchart(top_n_topics=7)

## Visualize documents
two dimensional projection of the embeddings. 
parameters to scale this down: https://maartengr.github.io/BERTopic/api/plotting/documents.html#bertopic.plotting._documents.visualize_documents
Hover https://maartengr.github.io/BERTopic/getting_started/visualization/visualize_documents.html

#### BERTopic Docs + Topics

In [52]:
# Run the visualization with the original embeddings
topic_model.visualize_documents(tweet_list, embeddings=embeddings, topics = [1, 2, 3, 4, 5])