# Using BERTopic for topic modelling

### Data Preprocessing

In [1]:
import pandas as pd
import numpy as np
import re
from cleantext import clean
from langdetect import detect
# For cleantext library, first need to: pip install clean-text[gpl]
# For langdetect library, first need to: pip install langdetect

In [2]:
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from datetime import datetime
import pickle

In [9]:
data = pd.read_csv("Twitter Jan Mar_cleaned.csv")

In [10]:
def preprocess(x):
    x = x.lower()
    x = re.sub(r'\s', ' ', x, flags=re.MULTILINE)
    x = re.sub('[!"$%&\'()*+,-./:;<=>?^_`{|}~]', '', x, flags=re.MULTILINE)
    x = re.sub(r'http\S+', '', x, flags=re.MULTILINE)
    x = re.sub(r'@\S+', '', x, flags=re.MULTILINE)
    x = re.sub(r'#\S+', '', x, flags=re.MULTILINE)
    x = clean(x, no_emoji=True)
    x = re.sub(r'\s*\w*gpt\w*\s*', ' ', x, flags=re.MULTILINE)
    x = re.sub(r'\s*\w*ai\w*\s*', ' ', x, flags=re.MULTILINE)
    x = x.strip()
    return x

def detect_lang(tweet, target_lang):
    try:
        result_lang = detect(tweet)
        if result_lang == target_lang:
            return True
        else:
            return False
    except:
        return False

In [11]:
data['content_processed'] = data['content'].map(preprocess)

In [12]:
# Filter out tweets that are too short (30 chars or less)
data = data[data['content_processed'].str.len() > 30]
data

Unnamed: 0,date,id,content,username,like_count,retweet_count,content_processed
0,2023-03-29 22:58:21+00:00,1.641210e+18,"Free AI marketing and automation tools, strate...",RealProfitPros,0,0,free marketing and automation tools strategies...
2,2023-03-29 22:57:53+00:00,1.641210e+18,https://t.co/FjJSprt0te - Chat with any PDF!\n...,yjleon1976,0,0,chat with any pdf check out how this new quick...
3,2023-03-29 22:57:52+00:00,1.641210e+18,"AI muses: ""In the court of life, we must all f...",ChatGPT_Thinks,0,0,muses in the court of life we must all face th...
4,2023-03-29 22:57:26+00:00,1.641210e+18,Most people haven't heard of Chat GPT yet.\nFi...,nikocosmonaut,0,0,most people havent heard of chat yet first eli...
5,2023-03-29 22:57:20+00:00,1.641210e+18,@nytimes No! Chat Gpt has been putting togethe...,cordydbarb,0,0,no chat has been putting together amazing recipes
...,...,...,...,...,...,...,...
499997,2023-01-04 07:18:08+00:00,1.610540e+18,@GoogleAI #LAMDA Versus @OpenAI #ChatGPT ?! Wh...,Pup_In_Cup,1,0,versus who cares lamda isnt is whats my reason...
499998,2023-01-04 07:17:50+00:00,1.610540e+18,#ChatGPT \n\nSo much #Censorship.\n\nNever tru...,TryingToOffend,2,0,so much never trust a system you dont admin
499999,2023-01-04 07:17:20+00:00,1.610540e+18,all my twitter feed is about ChatGPT and @Open...,mcp350,3,1,all my twitter feed is about and lol
500000,2023-01-04 07:17:08+00:00,1.610540e+18,I'm quite amazed by Chat GPT. A really promisi...,manumurali369,1,0,im quite amazed by chat a really promising per...


In [13]:
# Filter out tweets that are non-English
mask = data['content_processed'].apply(lambda twt: detect_lang(twt, target_lang='en'))
data = data[mask]
data

Unnamed: 0,date,id,content,username,like_count,retweet_count,content_processed
0,2023-03-29 22:58:21+00:00,1.641210e+18,"Free AI marketing and automation tools, strate...",RealProfitPros,0,0,free marketing and automation tools strategies...
2,2023-03-29 22:57:53+00:00,1.641210e+18,https://t.co/FjJSprt0te - Chat with any PDF!\n...,yjleon1976,0,0,chat with any pdf check out how this new quick...
3,2023-03-29 22:57:52+00:00,1.641210e+18,"AI muses: ""In the court of life, we must all f...",ChatGPT_Thinks,0,0,muses in the court of life we must all face th...
4,2023-03-29 22:57:26+00:00,1.641210e+18,Most people haven't heard of Chat GPT yet.\nFi...,nikocosmonaut,0,0,most people havent heard of chat yet first eli...
5,2023-03-29 22:57:20+00:00,1.641210e+18,@nytimes No! Chat Gpt has been putting togethe...,cordydbarb,0,0,no chat has been putting together amazing recipes
...,...,...,...,...,...,...,...
499996,2023-01-04 07:18:50+00:00,1.610540e+18,It’s not what you say\n(or write)\n\nIt’s how ...,StuLierich,1,0,it's not what you say or write it's how you sa...
499997,2023-01-04 07:18:08+00:00,1.610540e+18,@GoogleAI #LAMDA Versus @OpenAI #ChatGPT ?! Wh...,Pup_In_Cup,1,0,versus who cares lamda isnt is whats my reason...
499999,2023-01-04 07:17:20+00:00,1.610540e+18,all my twitter feed is about ChatGPT and @Open...,mcp350,3,1,all my twitter feed is about and lol
500000,2023-01-04 07:17:08+00:00,1.610540e+18,I'm quite amazed by Chat GPT. A really promisi...,manumurali369,1,0,im quite amazed by chat a really promising per...


In [14]:
# Output to csv file
data.to_csv('Tweets_for_Topic_Modelling.csv')

### Topic Modelling

In [3]:
data = pd.read_csv('Tweets_for_Topic_Modelling.csv')
data = data.drop(data.columns[0], axis=1)
data

Unnamed: 0,date,id,content,username,like_count,retweet_count,content_processed
0,2023-03-29 22:58:21+00:00,1.641210e+18,"Free AI marketing and automation tools, strate...",RealProfitPros,0,0,free marketing and automation tools strategies...
1,2023-03-29 22:57:53+00:00,1.641210e+18,https://t.co/FjJSprt0te - Chat with any PDF!\n...,yjleon1976,0,0,chat with any pdf check out how this new quick...
2,2023-03-29 22:57:52+00:00,1.641210e+18,"AI muses: ""In the court of life, we must all f...",ChatGPT_Thinks,0,0,muses in the court of life we must all face th...
3,2023-03-29 22:57:26+00:00,1.641210e+18,Most people haven't heard of Chat GPT yet.\nFi...,nikocosmonaut,0,0,most people havent heard of chat yet first eli...
4,2023-03-29 22:57:20+00:00,1.641210e+18,@nytimes No! Chat Gpt has been putting togethe...,cordydbarb,0,0,no chat has been putting together amazing recipes
...,...,...,...,...,...,...,...
418953,2023-01-04 07:18:50+00:00,1.610540e+18,It’s not what you say\n(or write)\n\nIt’s how ...,StuLierich,1,0,it's not what you say or write it's how you sa...
418954,2023-01-04 07:18:08+00:00,1.610540e+18,@GoogleAI #LAMDA Versus @OpenAI #ChatGPT ?! Wh...,Pup_In_Cup,1,0,versus who cares lamda isnt is whats my reason...
418955,2023-01-04 07:17:20+00:00,1.610540e+18,all my twitter feed is about ChatGPT and @Open...,mcp350,3,1,all my twitter feed is about and lol
418956,2023-01-04 07:17:08+00:00,1.610540e+18,I'm quite amazed by Chat GPT. A really promisi...,manumurali369,1,0,im quite amazed by chat a really promising per...


In [4]:
tweets = list(data['content_processed'])
tweets[0]

'free marketing and automation tools strategies and collaboration launching new week'

In [23]:
stopwords = list(ENGLISH_STOP_WORDS)
stopwords.extend(['chat', 'open', 'artificial', 'intelligence'])
stopwords[:5]

['neither', 'yourselves', 'next', 'wherein', 'against']

#### Run BERTopic model

In [24]:
start_time = datetime.now()
print("Cell run start time:", start_time)
print("----------------------------------")
vectorizer_model = CountVectorizer(ngram_range=(1,2), stop_words=stopwords, min_df=0.05)
topic_model = BERTopic(vectorizer_model=vectorizer_model, min_topic_size=50, verbose=True, low_memory=True)
topics, proba = topic_model.fit_transform(tweets)

end_time = datetime.now()
print("Cell run end time:", end_time)

Cell run start time: 2023-06-19 15:37:10.271438
----------------------------------


Batches:   0%|          | 0/13093 [00:00<?, ?it/s]

2023-06-19 16:55:24,577 - BERTopic - Transformed documents to Embeddings
2023-06-19 17:28:04,117 - BERTopic - Reduced dimensionality
2023-06-19 17:29:39,803 - BERTopic - Clustered reduced embeddings


Cell run end time: 2023-06-19 17:30:12.187178


In [25]:
# Pickle topic model
pickle.dump(topic_model, open('topic_model.sav','wb'))

  self._set_arrayXarray(i, j, x)


In [5]:
# Load topic model from pickle file
topic_model = pickle.load(open('topic_model.sav','rb'))

In [5]:
top_topics = topic_model.get_topic_info()
top_topics.head(21)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,197491,-1_like_amp_human_just,"[like, amp, human, just, work, using, use, new...",[as a former english teacher i can't help but ...
1,0,6399,0_twitter_tweet_tweets_social media,"[twitter, tweet, tweets, social media, social,...","[everyone on twitter in the last few day, why ..."
2,1,5910,1_language_language model_language models_larg...,"[language, language model, language models, la...",[and other large language models could be fake...
3,2,4119,2_capacity_lol_shit_tried,"[capacity, lol, shit, tried, fucking, chats, i...","[chat has been at capacity for days now, i cha..."
4,3,3426,3_essay_school_essays_students,"[essay, school, essays, students, teacher, tea...",[published on youtube how do i write essay in ...
5,4,3100,4_amp_tech_discuss_impact,"[amp, tech, discuss, impact, innovation, poten...",[our newsletter is out with amp on the new amp...
6,5,3053,5_song_music_rap_asked write,"[song, music, rap, asked write, style, write, ...",[i asked to write a rap song about and in the ...
7,6,2914,6_code_coding_programmers_developers,"[code, coding, programmers, developers, progra...",[cannot replace software engineers or develope...
8,7,2887,7_education_classroom_teachers_educators,"[education, classroom, teachers, educators, te...",[is going to revolutionize education for both ...
9,8,2694,8_chatbots_chatbot_bots_bot,"[chatbots, chatbot, bots, bot, customer, custo...","[chatbot out there its even better than the, 5..."


In [6]:
top_topics.head(31)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,197491,-1_like_amp_human_just,"[like, amp, human, just, work, using, use, new...",[as a former english teacher i can't help but ...
1,0,6399,0_twitter_tweet_tweets_social media,"[twitter, tweet, tweets, social media, social,...","[everyone on twitter in the last few day, why ..."
2,1,5910,1_language_language model_language models_larg...,"[language, language model, language models, la...",[and other large language models could be fake...
3,2,4119,2_capacity_lol_shit_tried,"[capacity, lol, shit, tried, fucking, chats, i...","[chat has been at capacity for days now, i cha..."
4,3,3426,3_essay_school_essays_students,"[essay, school, essays, students, teacher, tea...",[published on youtube how do i write essay in ...
5,4,3100,4_amp_tech_discuss_impact,"[amp, tech, discuss, impact, innovation, poten...",[our newsletter is out with amp on the new amp...
6,5,3053,5_song_music_rap_asked write,"[song, music, rap, asked write, style, write, ...",[i asked to write a rap song about and in the ...
7,6,2914,6_code_coding_programmers_developers,"[code, coding, programmers, developers, progra...",[cannot replace software engineers or develope...
8,7,2887,7_education_classroom_teachers_educators,"[education, classroom, teachers, educators, te...",[is going to revolutionize education for both ...
9,8,2694,8_chatbots_chatbot_bots_bot,"[chatbots, chatbot, bots, bot, customer, custo...","[chatbot out there its even better than the, 5..."


In [7]:
for i in range(1, 31):
    print("(", i-1, ")")
    topic = top_topics["Representation"][i]
    print("Topic words:", topic)
    docs = top_topics["Representative_Docs"][i]
    print("Topic docs:", docs)
    print("----------------------------------------------------------")

( 0 )
Topic words: ['twitter', 'tweet', 'tweets', 'social media', 'social', 'media', 'followers', 'retweet', 'account', 'posts']
Topic docs: ['everyone on twitter in the last few day', 'why is on twitter an answer by itself', "how about using to factcheck politician's tweets"]
----------------------------------------------------------
( 1 )
Topic words: ['language', 'language model', 'language models', 'large language', 'natural language', 'language processing', 'models', 'natural', 'large', 'processing']
Topic docs: ['and other large language models could be fake', 'the next generation of large language models', 'after large language models now its time for large world models']
----------------------------------------------------------
( 2 )
Topic words: ['capacity', 'lol', 'shit', 'tried', 'fucking', 'chats', 'im', 'yall', 'log', 'guys']
Topic docs: ['chat has been at capacity for days now', 'i chat plus for it to still be at capacity sometimes lol', 'i cant even use chat because its

In [6]:
df = topic_model.get_document_info(tweets, df=data)
df

Unnamed: 0,date,id,content,username,like_count,retweet_count,content_processed,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,2023-03-29 22:58:21+00:00,1.641210e+18,"Free AI marketing and automation tools, strate...",RealProfitPros,0,0,free marketing and automation tools strategies...,free marketing and automation tools strategies...,-1,-1_like_amp_human_just,"[like, amp, human, just, work, using, use, new...",[as a former english teacher i can't help but ...,like - amp - human - just - work - using - use...,0.000000,False
1,2023-03-29 22:57:53+00:00,1.641210e+18,https://t.co/FjJSprt0te - Chat with any PDF!\n...,yjleon1976,0,0,chat with any pdf check out how this new quick...,chat with any pdf check out how this new quick...,350,350_pdf_files_upload_document,"[pdf, files, upload, document, summarize, file...",[impressive pdf tool it is free at least for n...,pdf - files - upload - document - summarize - ...,1.000000,False
2,2023-03-29 22:57:52+00:00,1.641210e+18,"AI muses: ""In the court of life, we must all f...",ChatGPT_Thinks,0,0,muses in the court of life we must all face th...,muses in the court of life we must all face th...,23,23_legal_lawyers_law_lawyer,"[legal, lawyers, law, lawyer, court, judge, ju...",[legal creativity legal issues often require c...,legal - lawyers - law - lawyer - court - judge...,0.653853,False
3,2023-03-29 22:57:26+00:00,1.641210e+18,Most people haven't heard of Chat GPT yet.\nFi...,nikocosmonaut,0,0,most people havent heard of chat yet first eli...,most people havent heard of chat yet first eli...,-1,-1_like_amp_human_just,"[like, amp, human, just, work, using, use, new...",[as a former english teacher i can't help but ...,like - amp - human - just - work - using - use...,0.000000,False
4,2023-03-29 22:57:20+00:00,1.641210e+18,@nytimes No! Chat Gpt has been putting togethe...,cordydbarb,0,0,no chat has been putting together amazing recipes,no chat has been putting together amazing recipes,27,27_cooking_food_dinner_cook,"[cooking, food, dinner, cook, eat, plan, lunch...",[i my not all world know what cooking on do yo...,cooking - food - dinner - cook - eat - plan - ...,1.000000,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418953,2023-01-04 07:18:50+00:00,1.610540e+18,It’s not what you say\n(or write)\n\nIt’s how ...,StuLierich,1,0,it's not what you say or write it's how you sa...,it's not what you say or write it's how you sa...,-1,-1_like_amp_human_just,"[like, amp, human, just, work, using, use, new...",[as a former english teacher i can't help but ...,like - amp - human - just - work - using - use...,0.000000,False
418954,2023-01-04 07:18:08+00:00,1.610540e+18,@GoogleAI #LAMDA Versus @OpenAI #ChatGPT ?! Wh...,Pup_In_Cup,1,0,versus who cares lamda isnt is whats my reason...,versus who cares lamda isnt is whats my reason...,415,415_lamda_googles_google_sentient,"[lamda, googles, google, sentient, vs, dialogu...",[lamda google's breakthrough conversation tech...,lamda - googles - google - sentient - vs - dia...,1.000000,False
418955,2023-01-04 07:17:20+00:00,1.610540e+18,all my twitter feed is about ChatGPT and @Open...,mcp350,3,1,all my twitter feed is about and lol,all my twitter feed is about and lol,0,0_twitter_tweet_tweets_social media,"[twitter, tweet, tweets, social media, social,...","[everyone on twitter in the last few day, why ...",twitter - tweet - tweets - social media - soci...,1.000000,False
418956,2023-01-04 07:17:08+00:00,1.610540e+18,I'm quite amazed by Chat GPT. A really promisi...,manumurali369,1,0,im quite amazed by chat a really promising per...,im quite amazed by chat a really promising per...,15,15_google_google search_search_use google,"[google, google search, search, use google, se...","[will chat take over google via danielle, will...",google - google search - search - use google -...,0.958818,False


In [12]:
tweet_with_topics = df[['Topic', 'username', 'content', 'date']]
tweet_with_topics['date'] = pd.to_datetime(tweet_with_topics['date'], format='%Y-%m-%d')
tweet_with_topics = tweet_with_topics.loc[tweet_with_topics['date'].dt.month == 3]
tweet_with_topics

Unnamed: 0,Topic,username,content,date
0,-1,RealProfitPros,"Free AI marketing and automation tools, strate...",2023-03-29 22:58:21+00:00
1,350,yjleon1976,https://t.co/FjJSprt0te - Chat with any PDF!\n...,2023-03-29 22:57:53+00:00
2,23,ChatGPT_Thinks,"AI muses: ""In the court of life, we must all f...",2023-03-29 22:57:52+00:00
3,-1,nikocosmonaut,Most people haven't heard of Chat GPT yet.\nFi...,2023-03-29 22:57:26+00:00
4,27,cordydbarb,@nytimes No! Chat Gpt has been putting togethe...,2023-03-29 22:57:20+00:00
...,...,...,...,...
161213,9,EONRealityInc,The best part about AI in healthcare is its po...,2023-03-01 00:00:18+00:00
161214,1,speckproducts,"🤖 Hey there! It's #ChatGPT, your favorite AI l...",2023-03-01 00:00:16+00:00
161215,117,louismyers110,Why was the John green book so sad? \nBecause ...,2023-03-01 00:00:04+00:00
161216,-1,jj_devbot_,Noun: victor\n\nA victor is someone who has co...,2023-03-01 00:00:02+00:00


In [11]:
tweet_with_topics.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 418958 entries, 0 to 418957
Data columns (total 5 columns):
 #   Column      Non-Null Count   Dtype              
---  ------      --------------   -----              
 0   Topic       418958 non-null  int64              
 1   username    418958 non-null  object             
 2   content     418958 non-null  object             
 3   date        418958 non-null  object             
 4   tweet_date  418958 non-null  datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(1), object(3)
memory usage: 19.2+ MB


In [13]:
topics_list = [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 26, 27, 28]
df_filtered = tweet_with_topics[tweet_with_topics['Topic'].isin(topics_list)]
df_filtered

Unnamed: 0,Topic,username,content,date
2,23,ChatGPT_Thinks,"AI muses: ""In the court of life, we must all f...",2023-03-29 22:57:52+00:00
4,27,cordydbarb,@nytimes No! Chat Gpt has been putting togethe...,2023-03-29 22:57:20+00:00
10,9,puppetsucks,@ThatOuternaut HEY CHAT GPT I DON'T GOT TIME F...,2023-03-29 22:56:30+00:00
22,0,0xPromptcraft,Looking for new ways to improve your tweets? \...,2023-03-29 22:53:00+00:00
26,8,BadrBellaj,Run a Gpt 4 chat-based chatbot in your laptop\...,2023-03-29 22:51:56+00:00
...,...,...,...,...
161177,26,bkbooks_au,Just tried #ChatGPT. We are both impressed and...,2023-03-01 00:15:53+00:00
161196,1,Ultraman8521023,@K_Karin_KK Try DeepL or ChatGPT. I personally...,2023-03-01 00:06:01+00:00
161210,27,cdossenb,"Started playing with #ChatGPT today.... ""I wor...",2023-03-01 00:00:45+00:00
161213,9,EONRealityInc,The best part about AI in healthcare is its po...,2023-03-01 00:00:18+00:00


In [15]:
col = 'Topic'
conditions = [df_filtered[col]==0, df_filtered[col].isin([3, 7]), df_filtered[col]==14,
              df_filtered[col]==6, df_filtered[col]==1, df_filtered[col]==13, df_filtered[col].isin([8, 18]),
              df_filtered[col].isin([10, 15, 17]), df_filtered[col]==5, df_filtered[col]==16, df_filtered[col]==9,
              df_filtered[col]==11, df_filtered[col]==12, df_filtered[col]==22, df_filtered[col]==23,
              df_filtered[col]==20, df_filtered[col]==27, df_filtered[col]==26, df_filtered[col]==28]
choices_names = ['Social Media', 'Education', 'Math', 'Programming/Coding', 'Natural Language Processing (NLP)',
           'Artificial Intelligence (AI)', 'Chatbots & Generative AI', 'Search Engines', 'Music', 'Poetry',
           'Medical/Healthcare', 'Workforce', 'Subscription Services', 'Prompt Engineering', 'Legal', 'Politics',
          'Culinary', 'Content Creation', 'Business & Marketing']

df_filtered['Topic names'] = np.select(conditions, choices_names, default='None')
df_filtered['Topic index'] = np.select(conditions, np.arange(1,20), default='None')
df_filtered = df_filtered[['Topic index', 'Topic names', 'username', 'content']]
df_filtered

Unnamed: 0,Topic index,Topic names,username,content
2,15,Legal,ChatGPT_Thinks,"AI muses: ""In the court of life, we must all f..."
4,17,Culinary,cordydbarb,@nytimes No! Chat Gpt has been putting togethe...
10,11,Medical/Healthcare,puppetsucks,@ThatOuternaut HEY CHAT GPT I DON'T GOT TIME F...
22,1,Social Media,0xPromptcraft,Looking for new ways to improve your tweets? \...
26,7,Chatbots & Generative AI,BadrBellaj,Run a Gpt 4 chat-based chatbot in your laptop\...
...,...,...,...,...
161177,18,Content Creation,bkbooks_au,Just tried #ChatGPT. We are both impressed and...
161196,5,Natural Language Processing (NLP),Ultraman8521023,@K_Karin_KK Try DeepL or ChatGPT. I personally...
161210,17,Culinary,cdossenb,"Started playing with #ChatGPT today.... ""I wor..."
161213,11,Medical/Healthcare,EONRealityInc,The best part about AI in healthcare is its po...


### Data checking

In [16]:
# Numnber of unique topics
df_filtered['Topic names'].nunique()

19

In [17]:
# Number of unique users
df_filtered['username'].nunique()

16508

In [18]:
# Distribution of topics
df_filtered['Topic names'].value_counts()

Social Media                         2546
Natural Language Processing (NLP)    2306
Education                            2040
Chatbots & Generative AI             1724
Search Engines                       1519
Music                                1086
Medical/Healthcare                   1043
Programming/Coding                    994
Workforce                             967
Artificial Intelligence (AI)          769
Subscription Services                 724
Prompt Engineering                    708
Math                                  703
Legal                                 670
Poetry                                666
Culinary                              615
Business & Marketing                  566
Content Creation                      442
Politics                              405
Name: Topic names, dtype: int64

In [19]:
output_df = df_filtered[['Topic names','Topic index', 'username']]
output_df

Unnamed: 0,Topic names,Topic index,username
2,Legal,15,ChatGPT_Thinks
4,Culinary,17,cordydbarb
10,Medical/Healthcare,11,puppetsucks
22,Social Media,1,0xPromptcraft
26,Chatbots & Generative AI,7,BadrBellaj
...,...,...,...
161177,Content Creation,18,bkbooks_au
161196,Natural Language Processing (NLP),5,Ultraman8521023
161210,Culinary,17,cdossenb
161213,Medical/Healthcare,11,EONRealityInc


In [20]:
# Output to csv file
output_df.to_csv('Tweets with topic labels.csv', index=False)