In [34]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pickle
from pathlib import Path
import os
import pyLDAvis
import pyLDAvis.lda_model

In [35]:
# Load in mixed reviews data
NOTEBOOK_PATH = Path(os.path.abspath(''))
DATA_PATH = NOTEBOOK_PATH.parent / 'data'
df = pd.read_csv(DATA_PATH / 'mixed_reviews.csv')

In [36]:
nltk.download('wordnet')

numbers_re = re.compile(r'\d+')
punctuation_re = re.compile(r'[^\w\s]')
extra_spaces_re = re.compile(r'\s+')

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    """
    Cleans `text` by:
      - setting it to lower case,
      - removing numbers,
      - removing punctuation,
      - removing extra spaces,
      - lemmatizing words, and
      - removing stopwords.
    """
    text = text.lower()
    text = numbers_re.sub(r'', text)
    text = punctuation_re.sub('', text)
    text = extra_spaces_re.sub(' ', text)
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords.words('english')])
    return text

[nltk_data] Downloading package wordnet to /home/daniel/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [37]:
# Clean the content in the dataframe
df['cleaned_content'] = df['content'].apply(preprocess_text)

In [38]:
# Vectorize the text (turn words into numerical data)
vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
X = vectorizer.fit_transform(df['cleaned_content'])

# Apply LDA for topic modeling
NUMBER_OF_TOPICS = 3
lda_model = LatentDirichletAllocation(n_components=NUMBER_OF_TOPICS, random_state=42)
lda_model.fit(X)

In [54]:
# Assign topics to reviews
df['topic'] = lda_model.transform(X).argmax(axis=1)

# Map topic numbers so that they start at 1 instead of 0 and are sorted in
# decreasing order
topic_sizes = df['topic'].value_counts().sort_values(ascending=False)
size_based_mapping = {old_topic: new_topic + 1 for new_topic, old_topic in enumerate(topic_sizes.index)}
df['topic'] = df['topic'].map(size_based_mapping)

def print_top_words(model, feature_names, n_top_words=10):
    """
    Prints the top words in each topic.
    """
    topics_map = {}
    for i, topic in enumerate(model.components_):
        topics_map[size_based_mapping[i]] = " ".join([feature_names[j] for j in topic.argsort()[:-n_top_words - 1:-1]])
    for i in sorted(topics_map.keys()):
        print(f'Topic #{i}: {topics_map[i]}')

# Print top words in each topic
print_top_words(lda_model, vectorizer.get_feature_names_out())

Topic #1: status app chat update good like video whatsapp option message
Topic #2: whatsapp app account problem phone message number help issue dont
Topic #3: whatsapp update app ai meta like version new updated feature


In [40]:
# df.to_csv('english_lda_whatsapp_reviews.csv')

# with open("lda.pkl", 'wb') as f:
#     pickle.dump(lda_model, f)

In [41]:
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,word_count,cleaned_content,topic
0,9a3397a7-f577-4d1a-b996-b906552bc5df,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Recently I've been having a lot of problems ge...,4,0,2.24.20.89,2024-10-19 16:37:37,,,2.24.20.89,43,recently ive lot problem getting photo video p...,0
1,73706729-cf15-4cf5-8c59-0966dbe770c4,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Suddenly can use Lock chats. The pull down so ...,4,0,2.24.20.89,2024-10-19 16:36:30,,,2.24.20.89,36,suddenly use lock chat pull view isnt working ...,1
2,c6585da6-3aeb-43b5-b584-752b97f6a9dc,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,I start to enjoy your new features but i don't...,2,0,,2024-10-19 16:23:27,,,,20,start enjoy new feature dont know blocked time...,2
3,adb01e41-8f3c-4e9d-85e6-10b7cb85d09d,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,"Am tired of updating it time to time,i think t...",3,0,2.24.13.77,2024-10-19 16:22:02,,,2.24.13.77,17,tired updating time timei think version whatsa...,0
4,585bfbdd-5e5a-471c-9717-de24d49f80b1,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Good to use but some times its not downloading...,3,0,2.24.20.89,2024-10-19 16:22:00,,,2.24.20.89,17,good use time downloading file pic video fix,1


In [42]:
# Visualizing the LDA model
pyLDAvis.enable_notebook()
panel = pyLDAvis.lda_model.prepare(lda_model, X, vectorizer)
pyLDAvis.display(panel)

In [76]:
def find_reviews_by_keyword_list(df, keyword_list):
    total = 0
    topic_total_map = {}
    for i in range(df.shape[0]):
        if any(keyword in df.iloc[i]['content'].lower() for keyword in keyword_list):
            total += 1

            topic = df.iloc[i]['topic']
            if topic in topic_total_map:
                topic_total_map[topic] += 1
            else:
                topic_total_map[topic] = 1
            print(f"TOPIC {topic}: {df.iloc[i]['content']}")
    print('===== Totals from each topic =====')
    for topic in sorted(topic_total_map.keys()):
        print(f'Topic {topic}: {topic_total_map[topic]}', end=', ')
    print(f'Total: {total}, all else 0')

def find_reviews_by_keyword(df, keyword):
    find_reviews_by_keyword_list(df, [keyword])

In [77]:
find_reviews_by_keyword_list(df, ['backup', 'back up', 'back-up'])

TOPIC 1: please make option to what for backup. I dont want backup all things, i just want to backup the chat only.
TOPIC 3: I have chat backups turned off, but as a new annoyance, I get frequent notifications, saying "couldn't complete backup"... Of course the darn thing couldn't complete the backup, it is not supposed to even try! That's a new bug (I hope it is a bug, and not intentional) that has been bothering me for two or three weeks now. Please, test your software before you publish it.
TOPIC 2: I am facing problem my WhatsApp account has stopped backup for last 2 week even when I change to another gmail (15gb full space) it still shows same message "unable to backup"....need support
TOPIC 2: After chats backup, then also some files are not working
TOPIC 2: Hello whatsapp , it's a very big problem I have to share you there is some issue in WhatsApp backup, if we forgot password and we don't have whatsapp also then we don't have any second chance to recover our data.if we recover

In [81]:
find_reviews_by_keyword_list(df, ['verification'])

TOPIC 2: My whatsapp is not working. two-step verification is not coming. Please help me
TOPIC 2: Please my account is being logged out continously and I always write a review and it will still log out again. I am a Nigerian student who is far away from home and I need my whatsapp account for online classes, after it has been given back it will always say you account has been flagged due to technical issues,the team should please review my whatsapp account and fix the problem. Now I can't even get my verification code due to several trials.
TOPIC 2: Request Dear WhatsApp team recently I'm facing a problem to received OTP I am trying to get verification code to login my WhatsApp but I didn't receive any code please fix this unburn my account as soon as possible
TOPIC 2: I haven't received any verification code for 2 days now both on call and SMS
TOPIC 2: I cant even receive my verification code after waiting for a seven good hour.
TOPIC 2: I can't enable my step two- verification. Why s

In [83]:
find_reviews_by_keyword_list(df, ['option'])

TOPIC 1: please make option to what for backup. I dont want backup all things, i just want to backup the chat only.
TOPIC 1: There's a feature I'd love to see added to WhatsApp: the ability to download view once pictures. Right now, if you want to save one of these photos, you have to go through a third-party website. This extra step is a hassle, and it might not always be safe. Wouldn't it be more convenient if WhatsApp itself offered a download option for view once pictures?" 🌼
TOPIC 3: I love using whatsapp but I didn't get the chat lock option in this update
TOPIC 1: As soon as I start using internet My wife think I am using WhatsApp Messenger , Please make whatsapp application airplane mode option, double click system allows me wife that I am always online , but I am using YouTube or other app for entertainment or time' pass
TOPIC 2: The app is incredible but in my friends Whatsapp they have AI option but i don't have i update my WhatsApp but nothing happened please Tell me what t