In [14]:
from gensim import corpora, models
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import nltk
import pandas as pd
nltk.download('stopwords')
nltk.download('wordnet')

# Sample data: list of documents
data = pd.read_csv('../5Shot_FullReviewsKeyphrases_OpenStorageUS.csv', usecols=[0], nrows=10, sep=';')
data = data.iloc[:, 0].astype(str).tolist()
data

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/davide.zanutto/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/davide.zanutto/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['Extra shelves. These extra shelves make waisted space not waisted. ',
 'White cubes. This unit works great for TV components and radio speakers. Plus the baskets hide all accessories underneath the components. ',
 'Bookshelf. All my extra books fit on the shelves. The baskets help hold the extra stuff that won’t look nice exposed. ',
 'Kallax. Nice clean look, sturdy and easy to put together',
 'Great for my plants. Instructions can be confusing but the shelf is a great price and perfect for my plants.',
 'White Kallax Shelves. These shelves are perfect! I bought the 9 cubbie and the 4 cubbie and put them side by side in my closet. They look so nice and professional. They provide so much extra storage. I will probably buy more for the upstairs closet as well. I hired the Task Rabbit to put them together and that was a great experience. Thank you IKEA for such a nice product.',
 'Easy to assemble.. Easy to assemble.',
 'Great bookshelves. We bought 4 of these and made a library wall w

In [21]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
texts = [[lemmatizer.lemmatize(word) for word in review.lower().split() if word not in stop_words]
         for review in data]

# Create a dictionary and corpus
dictionary = corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]

# LDA Model
lda_model = models.LdaModel(corpus, num_topics=4, id2word=dictionary, passes=15, alpha='auto')

# Print topics for each review
for i, bow in enumerate(corpus):
    print(f"Review {i+1} Topics:")
    print(lda_model.get_document_topics(bow))

topics = lda_model.show_topics(formatted=True, num_topics=3, num_words=3)
for topic in topics:
    print(topic)

Review 1 Topics:
[(2, 0.9823776)]
Review 2 Topics:
[(3, 0.98693025)]
Review 3 Topics:
[(2, 0.98979574)]
Review 4 Topics:
[(0, 0.9756653), (2, 0.01309073)]
Review 5 Topics:
[(2, 0.011675678), (3, 0.97845507)]
Review 6 Topics:
[(1, 0.9942336)]
Review 7 Topics:
[(0, 0.012376811), (2, 0.9658054), (3, 0.012723947)]
Review 8 Topics:
[(2, 0.98902583)]
Review 9 Topics:
[(0, 0.9723118), (2, 0.014894151)]
Review 10 Topics:
[(0, 0.012376813), (2, 0.9658054), (3, 0.012723949)]
(1, '0.039*"put" + 0.039*"cubbie" + 0.039*"nice"')
(3, '0.072*"great" + 0.050*"plants." + 0.028*"shelf"')
(2, '0.066*"extra" + 0.035*"great" + 0.035*"easy"')
