In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

# Sample DataFrame
data = {
    'cleaned_content': [
        "The app is great and very user-friendly.",
        "I had issues with logging in. Customer support is not helpful.",
        "Amazing features and easy to navigate.",
        "The app crashes frequently. Very frustrating experience.",
        "Good app but needs improvement in performance.",
        "Excellent service and very intuitive design.",
        "Terrible experience. The app is slow and unresponsive.",
        "Fantastic app! I love the new update.",
        "The app is okay but lacks some important features.",
        "Great app for banking on the go. Highly recommended."
    ]
}

df_reviews = pd.DataFrame(data)

# Vectorize the text data
vectorizer = CountVectorizer(max_df=0.9, min_df=1, stop_words='english')
doc_term_matrix = vectorizer.fit_transform(df_reviews['cleaned_content'])


In [3]:
from sklearn.decomposition import LatentDirichletAllocation

# Initialize and fit the LDA model
n_topics = 3  # Choose the number of topics
LDA = LatentDirichletAllocation(n_components=n_topics, random_state=42)
LDA.fit(doc_term_matrix)


In [7]:
# Get the topic distribution for each document
topic_assignments = LDA.transform(doc_term_matrix)
# Assign the topic with the highest probability to each document
assigned_topics = topic_assignments.argmax(axis=1)

# Add the assigned topic to the DataFrame
df_reviews['topic'] = assigned_topics
print(df_reviews['topic'])

0    0
1    1
2    1
3    2
4    1
5    2
6    0
7    1
8    2
9    1
Name: topic, dtype: int64


In [9]:
# Group the reviews by topic
grouped_reviews = df_reviews.groupby('topic')

# Print grouped reviews
for topic, group in grouped_reviews:
    print(f"Topic {topic}:")
    print(group['cleaned_content'])
    print("\n")


Topic 0:
0             The app is great and very user-friendly.
6    Terrible experience. The app is slow and unres...
Name: cleaned_content, dtype: object


Topic 1:
1    I had issues with logging in. Customer support...
2               Amazing features and easy to navigate.
4       Good app but needs improvement in performance.
7                Fantastic app! I love the new update.
9    Great app for banking on the go. Highly recomm...
Name: cleaned_content, dtype: object


Topic 2:
3    The app crashes frequently. Very frustrating e...
5         Excellent service and very intuitive design.
8    The app is okay but lacks some important featu...
Name: cleaned_content, dtype: object


