In [3]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from gensim import corpora, models

nltk.download('omw-1.4')


def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalpha()]
    
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    
    return tokens

def cluster_dialogue(dialogue, num_topics=3):
    texts = [preprocess_text(message) for message in dialogue]
    dictionary = corpora.Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    lda_model = models.LdaModel(corpus, num_topics=num_topics, id2word=dictionary)
    
    topic_assignments = [max(lda_model[text], key=lambda item: item[1])[0] for text in corpus]
    topic_clusters = {}
    
    for i, topic_id in enumerate(topic_assignments):
        if topic_id not in topic_clusters:
            topic_clusters[topic_id] = []
        topic_clusters[topic_id].append(dialogue[i])
    
    return topic_clusters

# Example conversation
conversation = [
    "Person A: What's your favorite type of food?",
    "Person B: I love Italian pasta and pizza.",
    "Person A: I'm more into spicy Asian dishes.",
    "Person B: That sounds interesting. Do you enjoy cooking at home?",
    "Person A: Yes, I often try out new recipes I find online.",
    "Person B: Cooking can be so much fun and rewarding.",
]

subtopic_clusters = cluster_dialogue(conversation, num_topics=2)
for topic_id, messages in subtopic_clusters.items():
    print(f"Subtopic {topic_id + 1}:")
    for message in messages:
        print(message)
    print("\n")


[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\drago\AppData\Roaming\nltk_data...


Subtopic 2:
Person A: What's your favorite type of food?
Person B: I love Italian pasta and pizza.
Person B: That sounds interesting. Do you enjoy cooking at home?
Person A: Yes, I often try out new recipes I find online.
Person B: Cooking can be so much fun and rewarding.


Subtopic 1:
Person A: I'm more into spicy Asian dishes.


