In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired
from bertopic.vectorizers import ClassTfidfTransformer
from transformers import AutoTokenizer, AutoModel
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from umap import UMAP
from hdbscan import HDBSCAN

conn = sqlite3.connect('../EconomicsFinal.db')

cursor = conn.cursor()
cursor.execute("SELECT title FROM EconomicsFinal")
rows = cursor.fetchall()

df = pd.DataFrame(rows, columns=['title'])

model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine')

hdbscan_model = HDBSCAN(min_cluster_size=15, metric='euclidean', cluster_selection_method='eom', prediction_data=True)

vectorizer_model = CountVectorizer(stop_words="english",ngram_range=(2,3))

ctfidf_model = ClassTfidfTransformer()

representation_model = KeyBERTInspired()

topic_model = BERTopic(
  embedding_model=model,                    
  umap_model=umap_model,                    
  hdbscan_model=hdbscan_model,              
  vectorizer_model=vectorizer_model,        
  ctfidf_model=ctfidf_model,                
  representation_model=representation_model 
)

topic_model.fit(df['title'])


In [None]:
topic_model.visualize_barchart(height=400,width=500)

In [None]:
topic_model.visualize_topics( width=1000, height=800)