# Topic modelling using Bertopic

In [65]:
from bertopic import BERTopic
import json
import umap
import pandas as pd
import hdbscan

In [66]:
def convert_to_string_iterables(docs, how="All"):
    if (how=="All"):    
        messagesList = []
        for conversation in docs:    
            messages=""
            for chat in conversation['chat_history']:
                if 'message' in chat:
                    messages=messages+chat['message']
                if 'agent_message' in chat:
                    messages=messages+chat['agent_message']
                    # messages.concat(chat['agent_message'])
            messagesList.append(messages)
        return(messagesList)
    else:
        return(None)




In [78]:
with open("data/chat_history.json","r", encoding="utf-8") as f:
    docs=json.load(f)

In [79]:
string_list=convert_to_string_iterables(docs, "All")

In [80]:
len(string_list)

8

In [81]:
# Customize the UMAP model
umap_model = umap.UMAP(n_neighbors=5, n_components=2, min_dist=0.0, metric='cosine')
hdbscan_model = hdbscan.HDBSCAN(min_cluster_size=3, min_samples=2, metric='euclidean')
topic_model= BERTopic(embedding_model="all-MiniLM-L6-v2", umap_model=umap_model,hdbscan_model=hdbscan_model)

topics, probs= topic_model.fit_transform(string_list)

In [82]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,8,-1_you_have_the_discount,"[you, have, the, discount, can, we, to, for, h...","[Hi, do you have stylish blue jacketsHello! Ye..."


In [83]:
topic_model.get_topic(-1)

[('you', 0.1934321572215864),
 ('have', 0.14497046875388903),
 ('the', 0.1311781219463537),
 ('discount', 0.12395904384554161),
 ('can', 0.1164991285372758),
 ('we', 0.0924136922429985),
 ('to', 0.0924136922429985),
 ('for', 0.0924136922429985),
 ('hello', 0.0836988609067404),
 ('do', 0.0836988609067404)]

In [84]:
topic_model.get_representative_docs(0)

In [85]:
df=pd.DataFrame({"topic":topics,"docs":string_list})

In [86]:
df

Unnamed: 0,topic,docs
0,-1,"Hi, I'm looking for a summer dress.Do you have..."
1,-1,"Hello, do you have size 8 in black jeans?Hi! L..."
2,-1,"Hi, can you recommend a jacket for cold weathe..."
3,-1,"Hello, I'm looking for a gift for my wife. Any..."
4,-1,"Hi, do you have any promotions or discounts av..."
5,-1,"Hi, do you have Winter jackets ?Hello! Yes, we..."
6,-1,Do you have some red jackets for winter Hello!...
7,-1,"Hi, do you have stylish blue jacketsHello! Yes..."


In [None]:
topic_model.visualize_barchart()