# **BERTopic Modeling**
- Implements GPT4 for topic labeling
- Creates topic batches for human testing

# Imports

In [1]:
import pandas as pd

In [2]:
import random

In [3]:
from bertopic import BERTopic

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


# Data

In [4]:
tweets = pd.read_csv('/Users/jennihawk/Documents/Data Science Projects/Project_NLP/TweetBatch3.csv')

In [5]:
tweets.head()

Unnamed: 0,text,cleaned
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...,roevember and forthepeople and votebluein2022...
1,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
2,RT @sandibachom: IS THIS THING ON???!!This is ...,rt is this thing on this is pathetic acting se...
3,RT @tleehumphrey: Today is the beginning of th...,rt today is the beginning of the inquiry into ...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...,rt mitch mcconnell kevin mccarthy they both kn...


In [6]:
tweets.drop(['cleaned'], axis=1, inplace=True)

In [7]:
tweets.head()

Unnamed: 0,text
0,@ReallyAmerican1 #Roevember and\n#ForThePeople...
1,RT @sandibachom: IS THIS THING ON???!!This is ...
2,RT @sandibachom: IS THIS THING ON???!!This is ...
3,RT @tleehumphrey: Today is the beginning of th...
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McC...


In [8]:
tweets.shape

(34993, 1)

In [9]:
#turn tweet column into a list of strings
tweet_list = tweets["text"].tolist()

# **Topic Modeling**



## OpenAI

In [10]:
from umap import UMAP
from sentence_transformers import SentenceTransformer

In [11]:
#installed with conda forge https://anaconda.org/conda-forge/openai
import openai

In [12]:
from bertopic.representation import OpenAI

In [13]:
# include random state to replicate results / prevents stochastic behavior
umap_model = UMAP(n_neighbors=15, n_components=5,
                  min_dist=0.0, metric='cosine', random_state=42)

In [14]:
# Fine-tune topic representations with GPT
# https://github.com/theskumar/python-dotenv
openai.api_key = "secret"
representation_model = OpenAI(model="gpt-4", chat=True)

In [15]:
openai_model = BERTopic(language="english", calculate_probabilities=True, verbose=False, umap_model = umap_model,representation_model=representation_model).fit(tweet_list)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

## Extracting Topics - Most Frequent Topics

In [16]:
openai_topic_info = openai_model.get_topic_info()

In [17]:
pd.set_option('display.max_colwidth', None)

In [18]:
openai_topic_info.head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,2695,-1_January 6th Committee Hearings and Alleged Trump Riot Coverup,[January 6th Committee Hearings and Alleged Trump Riot Coverup],"[RT @StandForBetter: The Secret Service clearly has been involved in a coverup of the Trump riot at The Capitol.\n\n#TrumpsCoupPlaybook\n#Janua…, RT @StandForBetter: The Secret Service clearly has been involved in a coverup of the Trump riot at The Capitol.\n\n#TrumpsCoupPlaybook\n#Janua…, How does it feel to know that YOUR PRESIDENT FAILED YOU ON JAN. 6th… “ I’ll be there with you “ \n\nNow, Trump has to appear before Congress.\n#January6thCommitteeHearings]"
1,0,6006,0_Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions,[Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions],"[RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…, RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…, RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…]"
2,1,1368,1_Trump's Alleged Coup Attempt,[Trump's Alleged Coup Attempt],"[RT @StandForBetter: 📺 NEW Video:\n\nTrump Lost, yet he decided to hatch a coup with the help of Mark Meadows, Roger Stone, Rudy Giuliani, and…, RT @StandForBetter: 📺 NEW Video:\n\nTrump Lost, yet he decided to hatch a coup with the help of Mark Meadows, Roger Stone, Rudy Giuliani, and…, RT @StandForBetter: 📺 NEW Video:\n\nTrump Lost, yet he decided to hatch a coup with the help of Mark Meadows, Roger Stone, Rudy Giuliani, and…]"
3,2,1176,2_Unanimous vote to subpoena Donald Trump,[Unanimous vote to subpoena Donald Trump],"[RT @AdamKinzinger: We just voted unanimously to subpoena Donald Trump to testify UNDER OATH.\nDemocracy demands it.\nHistory deserves it.\n#Ja…, RT @AdamKinzinger: We just voted unanimously to subpoena Donald Trump to testify UNDER OATH.\nDemocracy demands it.\nHistory deserves it.\n#Ja…, RT @AdamKinzinger: We just voted unanimously to subpoena Donald Trump to testify UNDER OATH.\nDemocracy demands it.\nHistory deserves it.\n#Ja…]"
4,3,1117,"3_Democracy, Violence and Love for Country","[Democracy, Violence and Love for Country]","[RT @Resist_MAGA_GOP: Mark Hamill is correct. You can love this country without creating violence by overthrowing democracy. #January6thComm…, RT @Resist_MAGA_GOP: Mark Hamill is correct. You can love this country without creating violence by overthrowing democracy. #January6thComm…, RT @Resist_MAGA_GOP: Mark Hamill is correct. You can love this country without creating violence by overthrowing democracy. #January6thComm…]"
5,4,1106,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard,[Acting Defense Sec Chris Miller's Authority to Deploy National Guard],"[RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…]"
6,5,1060,5_Trump's Role in January 6th Attack,[Trump's Role in January 6th Attack],"[RT @AdamKinzinger: Trump is the author of the January 6th attack. \nTrump summoned the rioters.\nTrump goaded on their violence.\nTrump excuse…, RT @AdamKinzinger: Trump is the author of the January 6th attack. \nTrump summoned the rioters.\nTrump goaded on their violence.\nTrump excuse…, RT @AdamKinzinger: Trump is the author of the January 6th attack. \nTrump summoned the rioters.\nTrump goaded on their violence.\nTrump excuse…]"
7,6,949,6_Trump's Rejection and Attempt to Break Democracy,[Trump's Rejection and Attempt to Break Democracy],"[RT @AdamKinzinger: When he couldn't accept his loss - his rejection by the American people - Trump tried to break our democracy. He decided…, RT @AdamKinzinger: When he couldn't accept his loss - his rejection by the American people - Trump tried to break our democracy. He decided…, RT @AdamKinzinger: When he couldn't accept his loss - his rejection by the American people - Trump tried to break our democracy. He decided…]"
8,7,712,7_January 6th Committee Hearings & Trump Coup Attempt,[January 6th Committee Hearings & Trump Coup Attempt],"[RT @StandForBetter: This says it all.\n\n#TrumpCoupAttempt\n#January6thCommitteeHearings https://t.co/95KjEX8fAF, RT @StandForBetter: This says it all.\n\n#TrumpCoupAttempt\n#January6thCommitteeHearings https://t.co/95KjEX8fAF, RT @StandForBetter: This says it all.\n\n#TrumpCoupAttempt\n#January6thCommitteeHearings https://t.co/95KjEX8fAF]"
9,8,629,8_Law Enforcement's Inaction on White Nationalist Warnings,[Law Enforcement's Inaction on White Nationalist Warnings],"[RT @chadloder: The reason law enforcement failed to act on the flood of warnings about the plans for a violent, armed white nationalist ins…, RT @chadloder: The reason law enforcement failed to act on the flood of warnings about the plans for a violent, armed white nationalist ins…, RT @chadloder: The reason law enforcement failed to act on the flood of warnings about the plans for a violent, armed white nationalist ins…]"


In [29]:
#openai_topic_info.to_csv('gpt4_frequent_topics_retweets.csv')

#### OpenAI Get Document Info

In [30]:
openai_doc_info = openai_model.get_document_info(tweet_list)

In [31]:
openai_doc_info

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,@ReallyAmerican1 #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,25,25_Political Activism and Advocacy on Social Issues,[Political Activism and Advocacy on Social Issues],"[@michael_muscato #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @santiagomayer_ #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings..., @ashley_ehasz #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...]",Political Activism and Advocacy on Social Issues,0.233988,False
1,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,4,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard,[Acting Defense Sec Chris Miller's Authority to Deploy National Guard],"[RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…]",Acting Defense Sec Chris Miller's Authority to Deploy National Guard,1.000000,True
2,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,4,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard,[Acting Defense Sec Chris Miller's Authority to Deploy National Guard],"[RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…]",Acting Defense Sec Chris Miller's Authority to Deploy National Guard,0.586859,True
3,RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…,104,104_Inquiry into Trudeau Gov's use of Emergencies Act against FreedomConvoy,[Inquiry into Trudeau Gov's use of Emergencies Act against FreedomConvoy],"[RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…, RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…, RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…]",Inquiry into Trudeau Gov's use of Emergencies Act against FreedomConvoy,1.000000,True
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…,0,0_Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions,[Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions],"[RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…, RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…, RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…]",Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions,0.201421,True
...,...,...,...,...,...,...,...,...
34988,RT @Adrian_Fontes: The January 6th committee just concluded its final hearing ahead of the midterms. These brave leaders remind us that we…,42,42_January 6th Committee Final Hearing,[January 6th Committee Final Hearing],"[RT @Adrian_Fontes: The January 6th committee just concluded its final hearing ahead of the midterms. These brave leaders remind us that we…, RT @Adrian_Fontes: The January 6th committee just concluded its final hearing ahead of the midterms. These brave leaders remind us that we…, RT @Adrian_Fontes: The January 6th committee just concluded its final hearing ahead of the midterms. These brave leaders remind us that we…]",January 6th Committee Final Hearing,0.138174,True
34989,#January6thCommitteeHearings and everyone running them are 🤡🌎 https://t.co/w3YONp1tdH,16,16_Ron4California's January 6th Committee Hearings Notifications,[Ron4California's January 6th Committee Hearings Notifications],"[RT @Ron4California: Every time the #January6thCommitteeHearings are on, I get more notifications about this., RT @Ron4California: Every time the #January6thCommitteeHearings are on, I get more notifications about this., RT @Ron4California: Every time the #January6thCommitteeHearings are on, I get more notifications about this.]",Ron4California's January 6th Committee Hearings Notifications,0.029946,False
34990,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,4,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard,[Acting Defense Sec Chris Miller's Authority to Deploy National Guard],"[RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…, RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…]",Acting Defense Sec Chris Miller's Authority to Deploy National Guard,0.162626,True
34991,So they are gonna subpoena Trump I am guessing they stop calling him the 45th and just start calling him the 5th 🤣🤣🤣🤣🤣🤣🤣#January6thCommitteeHearings,94,94_January 6th Committee's Planned Subpoena for Donald Trump,[January 6th Committee's Planned Subpoena for Donald Trump],"[RT @CyrusMMcQueen: It appears the January 6th Committee is prepared to subpoena Donald Trump… Until of course one of his paid troglodytes i…, RT @CyrusMMcQueen: It appears the January 6th Committee is prepared to subpoena Donald Trump… Until of course one of his paid troglodytes i…, RT @CyrusMMcQueen: It appears the January 6th Committee is prepared to subpoena Donald Trump… Until of course one of his paid troglodytes i…]",January 6th Committee's Planned Subpoena for Donald Trump,0.105250,False


In [32]:
openai_doc_info.to_csv('gpt4_all_content_retweets.csv')

# Create Intruder Batches For Human Coherence Testing

### BERTopic Intruder Batches

In [24]:
#dataframe that strips unecessary info when running through batch function
openai_doc_info2 = openai_doc_info.copy()

In [25]:
openai_doc_info2.drop(['Representation', 'Representative_Docs', 'Top_n_words', 'Probability', 'Representative_document'], axis=1, inplace=True)

In [26]:
openai_doc_info2.head()

Unnamed: 0,Document,Topic,Name
0,@ReallyAmerican1 #Roevember and\n#ForThePeople and\n#VoteBlueIn2022 and \n#StandWithUkraine️ and\n#PeopleOverPolitics and\n#OathBreakerMAGA and\n#LGBTQHistoryMonth and\n#InflationReductionAct and\n#RepublicanWarOnSeniors and\n#SocialSecurityIsOurMoney and\n#January6thCommitteeHearings...,25,25_Political Activism and Advocacy on Social Issues
1,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,4,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard
2,RT @sandibachom: IS THIS THING ON???!!This is pathetic. ACTING Sec of Defense Chris Miller was the ONLY person who can deploy the national…,4,4_Acting Defense Sec Chris Miller's Authority to Deploy National Guard
3,RT @tleehumphrey: Today is the beginning of the inquiry into the Trudeau gov’s use of the Emergencies Act against the #FreedomConvoy so wha…,104,104_Inquiry into Trudeau Gov's use of Emergencies Act against FreedomConvoy
4,RT @AdamKinzinger: Mitch McConnell.\nKevin McCarthy.\nThey both knew Trump was responsible. \nThey both called him out.\nThey both backed down.…,0,0_Reactions of Mitch McConnell and Kevin McCarthy to Trump's Actions


In [27]:
def create_batch(df, main_topic, specific_topic):
    # Get unique topics
    topics = df['Name'].unique()

    # Check if the main_topic is in the topics
    if main_topic not in topics:
        raise ValueError(f"Main topic '{main_topic}' not found in the DataFrame.")
    
    batch_topic_df = df[df['Name'] == main_topic]
    batch = batch_topic_df.sample(n=4).to_dict('records')
    
    # Mark these documents as not intruders
    for doc in batch:
        doc['intruder'] = 0

    # Select intruder documents from another topic
    intruder_df = df[df['Name'] == specific_topic].sample(1)

    # Converting ths sampled 'intruder_df' into a list of dicts and marking as intruders
    intruders = intruder_df.to_dict('records')  
    for intruder in intruders:
        intruder['intruder'] = 1

    # Add the intruder document to the batch
    batch.extend(intruders)
    
    # Shuffle the batch to randomly position the intruder
    random.shuffle(batch)

    return batch

In [28]:
#pd.set_option('display.max_colwidth', None)

In [34]:
# #creates list of dicts
# topic0_batch1 = create_batch(openai_doc_info2, "0_Politicians' Response to Trump's Responsibility", "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others")
# #creates dataframe from list of dicts
# topic0_batch1 = pd.DataFrame.from_records(topic0_batch1)

In [35]:
# #creates list of dicts
# topic0_batch2 = create_batch(openai_doc_info2, "0_Politicians' Response to Trump's Responsibility", "2_Testifying Donald Trump under oath")
# #creates dataframe from list of dicts
# topic0_batch2 = pd.DataFrame.from_records(topic0_batch2)

In [36]:
# #creates list of dicts
# topic0_batch3 = create_batch(openai_doc_info2, "0_Politicians' Response to Trump's Responsibility", "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm")
# #creates dataframe from list of dicts
# topic0_batch3 = pd.DataFrame.from_records(topic0_batch3)

In [37]:
# #creates list of dicts
# topic0_batch4 = create_batch(openai_doc_info2, "0_Politicians' Response to Trump's Responsibility", "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied")
# #creates dataframe from list of dicts
# topic0_batch4 = pd.DataFrame.from_records(topic0_batch4)

In [38]:
# #creates list of dicts
# topic0_batch5 = create_batch(openai_doc_info2, "0_Politicians' Response to Trump's Responsibility", "5_Trump's Involvement in the January 6th Attack")
# #creates dataframe from list of dicts
# topic0_batch5 = pd.DataFrame.from_records(topic0_batch5)

In [39]:
#topic0_batch5

In [40]:
# topic1_batch1 = create_batch(openai_doc_info2, "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others", "0_Politicians' Response to Trump's Responsibility")
# #creates dataframe from list of dicts
# topic1_batch1 = pd.DataFrame.from_records(topic1_batch1)

In [41]:
# topic1_batch2 = create_batch(openai_doc_info2, "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others", "2_Testifying Donald Trump under oath")
# #creates dataframe from list of dicts
# topic1_batch2 = pd.DataFrame.from_records(topic1_batch2)

In [42]:
# topic1_batch3 = create_batch(openai_doc_info2, "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others", "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm")
# #creates dataframe from list of dicts
# topic1_batch3 = pd.DataFrame.from_records(topic1_batch3)

In [44]:
# topic1_batch4 = create_batch(openai_doc_info2, "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others", "5_Trump's Involvement in the January 6th Attack")
# #creates dataframe from list of dicts
# topic1_batch4 = pd.DataFrame.from_records(topic1_batch4)

In [45]:
# topic1_batch5 = create_batch(openai_doc_info2, "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others", "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied")
# #creates dataframe from list of dicts
# topic1_batch5 = pd.DataFrame.from_records(topic1_batch5)

In [46]:
# topic2_batch1 = create_batch(openai_doc_info2, "2_Testifying Donald Trump under oath", "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others")
# #creates dataframe from list of dicts
# topic2_batch1 = pd.DataFrame.from_records(topic2_batch1)

In [47]:
# topic2_batch2 = create_batch(openai_doc_info2, "2_Testifying Donald Trump under oath", "0_Politicians' Response to Trump's Responsibility")
# #creates dataframe from list of dicts
# topic2_batch2 = pd.DataFrame.from_records(topic2_batch2)

In [48]:
# topic2_batch3 = create_batch(openai_doc_info2, "2_Testifying Donald Trump under oath", "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm")
# #creates dataframe from list of dicts
# topic2_batch3 = pd.DataFrame.from_records(topic2_batch3)

In [49]:
# topic2_batch4 = create_batch(openai_doc_info2, "2_Testifying Donald Trump under oath", "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied")
# #creates dataframe from list of dicts
# topic2_batch4 = pd.DataFrame.from_records(topic2_batch4)

In [50]:
# topic2_batch5 = create_batch(openai_doc_info2, "2_Testifying Donald Trump under oath", "5_Trump's Involvement in the January 6th Attack")
# #creates dataframe from list of dicts
# topic2_batch5 = pd.DataFrame.from_records(topic2_batch5)

In [51]:
# topic3_batch1 = create_batch(openai_doc_info2, "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm", "0_Politicians' Response to Trump's Responsibility")
# #creates dataframe from list of dicts
# topic3_batch1 = pd.DataFrame.from_records(topic3_batch1)

In [52]:
# topic3_batch2 = create_batch(openai_doc_info2, "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm", "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others")
# #creates dataframe from list of dicts
# topic3_batch2 = pd.DataFrame.from_records(topic3_batch2)

In [53]:
# topic3_batch3 = create_batch(openai_doc_info2, "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm", '2_Testifying Donald Trump under oath')
# #creates dataframe from list of dicts
# topic3_batch3 = pd.DataFrame.from_records(topic3_batch3)

In [54]:
# topic3_batch4 = create_batch(openai_doc_info2, "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm", "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied")
# #creates dataframe from list of dicts
# topic3_batch4 = pd.DataFrame.from_records(topic3_batch4)

In [55]:
# topic3_batch5 = create_batch(openai_doc_info2, "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm", "5_Trump's Involvement in the January 6th Attack")
# #creates dataframe from list of dicts
# topic3_batch5 = pd.DataFrame.from_records(topic3_batch5)

In [56]:
# topic4_batch1 = create_batch(openai_doc_info2, "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied", "0_Politicians' Response to Trump's Responsibility")
# #creates dataframe from list of dicts
# topic4_batch1 = pd.DataFrame.from_records(topic4_batch1)

In [57]:
# topic4_batch2 = create_batch(openai_doc_info2, "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied", "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others")
# #creates dataframe from list of dicts
# topic4_batch2 = pd.DataFrame.from_records(topic4_batch2)

In [58]:
# topic4_batch3 = create_batch(openai_doc_info2, "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied", "2_Testifying Donald Trump under oath")
# #creates dataframe from list of dicts
# topic4_batch3 = pd.DataFrame.from_records(topic4_batch3)

In [59]:
# topic4_batch4 = create_batch(openai_doc_info2, "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied", "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm")
# #creates dataframe from list of dicts
# topic4_batch4 = pd.DataFrame.from_records(topic4_batch4)

In [60]:
# topic4_batch5 = create_batch(openai_doc_info2, "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied", "5_Trump's Involvement in the January 6th Attack")
# #creates dataframe from list of dicts
# topic4_batch5 = pd.DataFrame.from_records(topic4_batch5)

In [61]:
# topic5_batch1 = create_batch(openai_doc_info2, "5_Trump's Involvement in the January 6th Attack", "0_Politicians' Response to Trump's Responsibility")
# #creates dataframe from list of dicts
# topic5_batch1 = pd.DataFrame.from_records(topic5_batch1)

In [62]:
# topic5_batch2 = create_batch(openai_doc_info2, "5_Trump's Involvement in the January 6th Attack", "1_Trump's coup attempt with the help of Giuliani, Meadows, Stone, and others")
# #creates dataframe from list of dicts
# topic5_batch2 = pd.DataFrame.from_records(topic5_batch2)

In [63]:
# topic5_batch3 = create_batch(openai_doc_info2, "5_Trump's Involvement in the January 6th Attack", "2_Testifying Donald Trump under oath")
# #creates dataframe from list of dicts
# topic5_batch3 = pd.DataFrame.from_records(topic5_batch3)

In [64]:
# topic5_batch4 = create_batch(openai_doc_info2, "5_Trump's Involvement in the January 6th Attack", "3_Mark Hamill's views on loving the country without creating violence by overthrowing democracy #January6thComm")
# #creates dataframe from list of dicts
# topic5_batch4 = pd.DataFrame.from_records(topic5_batch4)

In [65]:
# topic5_batch5 = create_batch(openai_doc_info2, "5_Trump's Involvement in the January 6th Attack", "4_Deployment of Acting Sec of Defense Chris Miller by National Guard denied")
# #creates dataframe from list of dicts
# topic5_batch5 = pd.DataFrame.from_records(topic5_batch5)

#### All Batches into Dataframe

In [66]:
# frames = [topic0_batch1,
# topic0_batch2,
# topic0_batch3,
# topic0_batch4,
# topic0_batch5,
# topic1_batch1,
# topic1_batch2,
# topic1_batch3,
# topic1_batch4,
# topic1_batch5,
# topic2_batch1,
# topic2_batch2,
# topic2_batch3,
# topic2_batch4,
# topic2_batch5,
# topic3_batch1,
# topic3_batch2,
# topic3_batch3,
# topic3_batch4,
# topic3_batch5,
# topic4_batch1,
# topic4_batch2,
# topic4_batch3,
# topic4_batch4,
# topic4_batch5,
# topic5_batch1,
# topic5_batch2,
# topic5_batch3,
# topic5_batch4,
# topic5_batch5
# ]

In [67]:
#result = pd.concat(frames)

In [68]:
# reset index
#result.reset_index(drop=True, inplace=True)

In [69]:
#result.head(20)

In [70]:
# #create column with intruder index
# result['intruder_index'] = None

In [71]:
# # Assign the index to the new column where column 'B' is True
# result.loc[result['intruder'] == True, 'intruder_index'] = result.index[result['intruder'] == True]

In [72]:
#result

In [91]:
#result.to_csv('open_ai_bertopic_with_dupes.csv')