In [2]:
import pandas as pd
import regex as re
from bs4 import BeautifulSoup
from bertopic.representation import KeyBERTInspired, PartOfSpeech, MaximalMarginalRelevance
from bertopic import BERTopic

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def preprocess_text(text):
    if text:
        # Remove HTML tags using BeautifulSoup
        soup = BeautifulSoup(text, "html.parser")
        text = soup.get_text(separator=" ")  # Extract text and replace tags with a space   
        # Replace multiple whitespace characters with a single space
        text = re.sub(r'\s+', ' ', text)
        # Strip leading and trailing whitespace
        text = text.strip()
    return text

ISSUE_DATA_PATH = "./Data/Issues.csv"
df = pd.read_csv(ISSUE_DATA_PATH)
df['Body'] = df['Body'].astype(str).fillna('')
df['combinedText'] = df['Title'] + ' ' + df['Body']

data = df['combinedText'].apply(preprocess_text).tolist()

In [10]:
representation_model = [KeyBERTInspired(), MaximalMarginalRelevance(diversity=0.3)]
topic_model = BERTopic(representation_model=representation_model, nr_topics=30)
topics, probs = topic_model.fit_transform(data)

In [11]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,5285,-1_bug_error_failed_issue,"[bug, error, failed, issue, same, fix, message...",[Fix policy condition calculation ### Descript...
1,0,2364,0_bug_chrome_issue_wallet,"[bug, chrome, issue, wallet, failed, error, br...",[(BUG) [RN] Update failed. Invalid value at 'f...
2,1,1105,1_bug_bot_sell_kelp2179004,"[bug, bot, sell, kelp2179004, kelp, transactio...","[[3] If short of assets, the error message sho..."
3,2,957,2_torprocessmanager_wasabi_wasabisynchronizer_tor,"[torprocessmanager, wasabi, wasabisynchronizer...",[GUI freeze after GeneralSocksServerFailure in...
4,3,942,3_token_tokens_colony_behavior,"[token, tokens, colony, behavior, behaviour, c...","[Vote is not being detected, allowing me to vo..."
5,4,840,4_bug_yarn_npm_issue,"[bug, yarn, npm, issue, tests, error, failed, ...",[Wallet config: set config to inactive when co...
6,5,716,5_bug_behaviour_issue_expected,"[bug, behaviour, issue, expected, fix, behavio...",[B **Describe the bug** A clear and concise de...
7,6,597,6_nan_error_balance_bug,"[nan, error, balance, bug, asset, missing, bit...",[Token Balance NaN Token balance should displa...
8,7,258,7_bug_scenes_loading_preview,"[bug, scenes, loading, preview, scene, issue, ...",[Remove noise from scene console Whenever you ...
9,8,199,8_comments_comment_empty_notification,"[comments, comment, empty, notification, notif...",[Submitting a new comment should show it on Fe...


In [13]:
topic_model.get_document_info(data)

Unnamed: 0,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,"""RangeError: Maximum call stack size exceeded""...",1,1_bug_bot_sell_kelp2179004,"[bug, bot, sell, kelp2179004, kelp, transactio...","[[3] If short of assets, the error message sho...",bug - bot - sell - kelp2179004 - kelp - transa...,1.000000,False
1,gekko not re-trying rejected trades **Note: th...,1,1_bug_bot_sell_kelp2179004,"[bug, bot, sell, kelp2179004, kelp, transactio...","[[3] If short of assets, the error message sho...",bug - bot - sell - kelp2179004 - kelp - transa...,0.302839,False
2,gekko instances (load/workers/loadCandles/chil...,2,2_torprocessmanager_wasabi_wasabisynchronizer_tor,"[torprocessmanager, wasabi, wasabisynchronizer...",[GUI freeze after GeneralSocksServerFailure in...,torprocessmanager - wasabi - wasabisynchronize...,1.000000,False
3,"Kraken, order not moving, fill not detected **...",1,1_bug_bot_sell_kelp2179004,"[bug, bot, sell, kelp2179004, kelp, transactio...","[[3] If short of assets, the error message sho...",bug - bot - sell - kelp2179004 - kelp - transa...,0.234561,False
4,Gekko Broker passed an `undefined` orderSummar...,1,1_bug_bot_sell_kelp2179004,"[bug, bot, sell, kelp2179004, kelp, transactio...","[[3] If short of assets, the error message sho...",bug - bot - sell - kelp2179004 - kelp - transa...,0.384661,False
...,...,...,...,...,...,...,...,...
14443,Event BLOCK_HEIGHT_CHANGED provides non-contin...,-1,-1_bug_error_failed_issue,"[bug, error, failed, issue, same, fix, message...",[Fix policy condition calculation ### Descript...,bug - error - failed - issue - same - fix - me...,0.000000,False
14444,deploy() does not work ## Steps to reproduce: ...,4,4_bug_yarn_npm_issue,"[bug, yarn, npm, issue, tests, error, failed, ...",[Wallet config: set config to inactive when co...,bug - yarn - npm - issue - tests - error - fai...,1.000000,False
14445,Two invokeRead method invocation will mix the ...,2,2_torprocessmanager_wasabi_wasabisynchronizer_tor,"[torprocessmanager, wasabi, wasabisynchronizer...",[GUI freeze after GeneralSocksServerFailure in...,torprocessmanager - wasabi - wasabisynchronize...,0.710369,False
14446,getAccount Alias returns undefined getAccount ...,4,4_bug_yarn_npm_issue,"[bug, yarn, npm, issue, tests, error, failed, ...",[Wallet config: set config to inactive when co...,bug - yarn - npm - issue - tests - error - fai...,1.000000,False
