In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from pathlib import Path
import os
import pyLDAvis
import pyLDAvis.lda_model
from utils.review_utils import find_reviews_by_keyword_list

In [2]:
# Load in mixed reviews data
NOTEBOOK_PATH = Path(os.path.abspath(''))
DATA_PATH = NOTEBOOK_PATH.parent / 'data'
# df = pd.read_csv(DATA_PATH / 'positive_reviews.csv')
# df = pd.read_csv(DATA_PATH / 'mixed_reviews.csv')
df = pd.read_csv(DATA_PATH / 'negative_reviews.csv')

In [3]:
nltk.download('wordnet')

numbers_re = re.compile(r'\d+')
punctuation_re = re.compile(r'[^\w\s]')
extra_spaces_re = re.compile(r'\s+')

lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    """
    Cleans `text` by:
      - setting it to lower case,
      - removing numbers,
      - removing punctuation,
      - removing extra spaces,
      - lemmatizing words, and
      - removing stopwords.
    """
    text = text.lower()
    text = numbers_re.sub(r'', text)
    text = punctuation_re.sub('', text)
    text = extra_spaces_re.sub(' ', text)
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split() if word not in stopwords.words('english')])
    return text

[nltk_data] Downloading package wordnet to /home/daniel/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
# Clean the content in the dataframe
df['cleaned_content'] = df['content'].apply(preprocess_text)

In [5]:
# Vectorize the text (turn words into numerical data)
vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
X = vectorizer.fit_transform(df['cleaned_content'])

# Apply LDA for topic modeling
NUMBER_OF_TOPICS = 3
lda_model = LatentDirichletAllocation(n_components=NUMBER_OF_TOPICS, random_state=42)
lda_model.fit(X)

In [6]:
# Assign topics to reviews
df['topic'] = lda_model.transform(X).argmax(axis=1)

# Map topic numbers so that they start at 1 instead of 0 and are sorted in
# decreasing order
topic_sizes = df['topic'].value_counts().sort_values(ascending=False)
size_based_mapping = {old_topic: new_topic + 1 for new_topic, old_topic in enumerate(topic_sizes.index)}
df['topic'] = df['topic'].map(size_based_mapping)

def print_top_words(model, feature_names, n_top_words=10):
    """
    Prints the top words in each topic.
    """
    topics_map = {}
    for i, topic in enumerate(model.components_):
        topics_map[size_based_mapping[i]] = " ".join([feature_names[j] for j in topic.argsort()[:-n_top_words - 1:-1]])
    for i in sorted(topics_map.keys()):
        print(f'Topic #{i}: {topics_map[i]}')

# Print top words in each topic
print_top_words(lda_model, vectorizer.get_feature_names_out())

Topic #1: app whatsapp update status video like dont new download time
Topic #2: whatsapp account number problem banned use team using help spam
Topic #3: message chat whatsapp phone app code issue send time working


In [7]:
# df.to_csv('english_lda_whatsapp_reviews.csv')

# with open("lda.pkl", 'wb') as f:
#     pickle.dump(lda_model, f)

In [8]:
df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion,word_count,cleaned_content,topic
0,2d8753c1-ef1c-4467-9915-9f962e4ee309,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,"I call this app scammer depot, because 97% of ...",1,1,,2024-10-19 16:49:00,,,,18,call app scammer depot group scammer trying st...,3
1,f8a2dd3d-26c3-499c-bb12-29644c3af355,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,"due to * ask meta al"" instead of ""Search"" option",1,0,2.24.19.86,2024-10-19 16:46:36,,,2.24.19.86,10,due ask meta al instead search option,1
2,c11677f2-54f1-4624-a893-055f356ed57c,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,This app is really annoying how can u claim to...,1,0,,2024-10-19 16:20:21,,,,31,app really annoying u claim good u allow peopl...,1
3,65894e9e-ff90-4bf6-8be7-fb7e9e2baa15,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,Please provide sending option more than five t...,1,0,2.24.20.89,2024-10-19 16:17:58,,,2.24.20.89,24,please provide sending option five timesit unl...,3
4,9fa00d4b-b00d-4916-86d7-1dba09736439,A Google user,https://play-lh.googleusercontent.com/EGemoI2N...,I don't have meta ai yet and I have updated an...,1,0,2.24.20.89,2024-10-19 16:13:55,,,2.24.20.89,14,dont meta ai yet updated cleared storage,1


In [9]:
# Visualizing the LDA model
pyLDAvis.enable_notebook()
panel = pyLDAvis.lda_model.prepare(lda_model, X, vectorizer)
pyLDAvis.display(panel)

In [10]:
reviews = {}

In [11]:
# find_reviews_by_keyword_list(df, ['backup', 'back up', 'back-up'])
reviews['backup'] = {}
reviews['backup']['custom_backup'] = [
    '''TOPIC 1 [2024-10-19]: please make option to what for backup. I dont want backup all things, i just want to backup the chat only.''',
    '''TOPIC 1 [2024-10-14]: WhatsApp please update the options for backup of only a specific chat....as we don't need all the chats to be backed up on our drive but we only need some specific ones....hope you get point and would consider it :)'''
]
reviews['backup']['incomplete_error_message'] = [
    '''TOPIC 2 [2024-10-06]: "Couldn't complete backup!" Getting this message every day while I have set it to "Never backup" or "Backup manually ". I DO NOT WANT backaup! Why I'm still getting this message?''',
    '''TOPIC 2 [2024-10-15]: Tried to restore backup in my new phone but failed. I mailed to concern and got just relaxtation NO SOLUTION. Backup is still available in my drive but nothing positive for me Now i think i lost my data''',
    '''TOPIC 2 [2024-09-15]: I can't backup my data it only reaches 80% and then it gets stuck''',
    '''TOPIC 2 [2024-10-02]: Since updating it has ANNOYING notifications on tablet that it could not complete backup. I set it to not back up, because my phone already does that, so why notify? Also it is trying to become more intrusive. Stick to the core secure messaging.''',
    '''TOPIC 2 [2024-08-20]: When I back up messages it's 100% completed but again it's getting reduced to 99% and getting increased'''
]
reviews['backup']['reloading_from_backup'] = [
    '''TOPIC 3 [2024-08-16]: Good app if you don't change your phone, cannot get any backup from the old phone....it sucks from this point''',
    '''TOPIC 2 [2024-10-12]: Nice app But I am really annoyed now I have tried to restore my backup but it's not working, I wish to see changes''',
    '''TOPIC 2 [2024-08-31]: I was unable to restore my backup from Google drive.''',
    '''TOPIC 2 [2024-10-14]: Unable to restore backup from Google drive. Old phone is backed up of Google drive I just cannot restore it. I can't do the transfer because it's the same number and one phone logs off the other. How can you transfer using a QR code. The instructions make it appear that you can have WhatsApp running on two devices with the same number but you can't. The other phone logs you out. I would like all the group chats on new phone, it can't get them. Disappointing. Very.''',
    '''TOPIC 2 [2024-10-11]: My whatsapp not restore pls help Sorry, we were unable to restore any of your message history backups.'''
]

In [12]:
# find_reviews_by_keyword_list(df, ['verification'])
reviews['verification'] = {}
reviews['verification']['custom_backup'] = [
    '''TOPIC 2 [2024-07-24]: I have reinstalled Whatsapp, Unable to receive verification code from last two days, please help''',
    '''TOPIC 2 [2024-09-01]: I found this app not useful because i can't receive my verification code''',
    '''TOPIC 2 [2024-09-16]: I don't know why, all my security features are intact with a hard to guess two step verification, no one called to demand any code, neither did I tap on any suspicious link yet, my account got hacked. Review your security features. Apart from that WhatsApp is the best.''',
    '''TOPIC 2 [2024-08-20]: Everything is good except for the two step verification code. It does not load or even work even people I know have problem with it please help us to sort it out because the moment we try to enter the pin and save it it does not work it does not even bring email . For this reason I have decided to stop using Whatsapp so that my account won't be hacked by fraudsters''',
    '''TOPIC 2 [2024-09-22]: Someone can access my account from time to time, even blocked my friend and I cannot enable 2 step verification either. There's no Whatsapp web in my settings'''
]

In [13]:
# reviews['video'] = {}
reviews['video'] = [
    '''TOPIC 3 [2024-10-16]: It's beautiful app!!!and sharing photos, video, audio, location, document etc and they have audio call, video call They can see status of person etc.......''',
    '''TOPIC 3 [2024-10-16]: It's quite wonderful,I allow me to send messages either video, pictures, audio,to any destination üíØüíØüíØ''',
    '''TOPIC 1 [2024-10-15]: I love WhatsApp. This App is Really Amazing. Talking with this app brings a lot of peace. Because it's calling quality is very good and video calling quality is also very good. I hope this app will give me more great features in the future.'''
]
reviews['messages'] = [
    '''TOPIC 3 [2024-10-19]: So fast to send and receive messages Thank you for the service offered''',
    '''TOPIC 1 [2024-10-19]: I really love this app because it can keep your messages private'''
]
reviews['world'] = [
    '''TOPIC 1 [2024-10-19]: Best application for contact friends and relatives all over the world!''',
    '''TOPIC 1 [2024-10-17]: Always works well Ideal for keeping in touch with friends and family around the world''',
    '''TOPIC 1 [2024-10-16]: I love this app. It's a great app to communicate with family and friends throughout the world. It's fast and secure.''',
    '''TOPIC 3 [2024-10-15]: Amazing that I can take photos & texts, have a chat or make a video call and send to friends all over the world for free.'''
]
reviews['easy'] = [
    '''TOPIC 1 [2024-10-19]: This is a very nice app that makes communication easy and fast''',
    '''TOPIC 1 [2024-10-18]: One of the best communication apps I have ever used. Easy to communicate with my friends and family.''',
    '''TOPIC 1 [2024-10-16]: The app is very easy to use and great for keeping in touch with family''',
    '''TOPIC 3 [2024-10-16]: I appreciate its simplicity and user-friendly interface, making it easy to send messages, voice notes, and photos.'''
]
reviews['free'] = [
    '''TOPIC 1 [2024-10-18]: THIS app is so confidential and easy to use ..No advertisement is disturbing me''',
    '''TOPIC 1 [2024-10-05]: It's amazing, reliable and cheap. Easy to Use and understand.''',
    '''TOPIC 1 [2024-10-19]: Very good app for easy and free messaging, you don't need to pay for anything it's awesome for real''',
    '''TOPIC 1 [2024-10-04]: I'm so happy about this app because I can communicate with my friends and family even if I don't have money'''
]
reviews['fast'] = [
    '''TOPIC 3 [2024-10-19]: So fast to send and receive messages Thank you for the service offered''',
    '''TOPIC 1 [2024-10-18]: I love using WhatsApp Messenger because it is more fastest way to communicate''',
    '''TOPIC 3 [2024-10-15]: Extremely fast in sending messages and clarity of the video'''
]
reviews['connect'] = [
    '''TOPIC 3 [2024-10-09]: "WhatsApp has revolutionized the way I connect with loved ones and colleagues! Its seamless messaging, crystal-clear voice and video calls, and intuitive interface make communication a breeze. The end-to-end encryption ensures my conversations stay private. Sharing files, photos, and videos is effortless. WhatsApp's reliability and speed have made it my go-to messaging app. Plus, features like status updates and group chats keep me connected. 5/5 stars . Thank you meta .''',
    '''TOPIC 1 [2024-09-15]: I love this app and it makes me smile and laugh and be connected to the community'''
]
reviews['support'] = [
    '''TOPIC 2 [2024-09-21]: I've been using WhatsApp for years to stay connected with my friends and family, and it's always been a fantastic app. Recently, I had an issue with my account, and I was really worried. However, the support team was amazing. They responded quickly and resolved my problem in no time. I appreciate their help so much.'''
]

In [None]:
reviews['ban_without_warning'] = [
    '''TOPIC 2 [2024-10-09]: This is a good app having an advantages where it serves for the costumer the way to communicate with others either chatting,calling,or by videos..On the other hand the company of Whatsapp it self don't send any warning before doing a ban for the number.. Moreover it counts a duration to regive the taken number then it didn't.. This company don't have respect to religions,politicians, or ownopinions it just forbids the costumar from his number which reflect a very bad picture about..Frustrating!''',
    '''TOPIC 2 [2024-10-09]: I'm using Whatsapp more than five years but Whatsapp banned me from using Whatsapp without any warning. I was not using my whatsap and my wifi was on and I put my phone into a charge. When I came back my WhatsApp was banned.''',
    '''TOPIC 2 [2024-10-05]: Worst experience They suddenly banned your number. Might be my whatsapp hacked or used inappropriate by someone. WhatsApp should give first warning then they can ban number. I have c√†rry my number from last 10 year nd suddenly it banned. Very disappointing.'''
]
reviews['attempted_scam'] = [
    '''TOPIC 1 [2024-10-19]: I'm constantly added to communities that are trying to scam me. There's no way to disable this bad feature. I don't want communities at all I just want to talk to my family. A good chat app would project it's users from scammers.''',
    '''TOPIC 1 [2024-10-13]: This app is used by scammers to steal your information. Near every "ad" I have ghosted on Fakebook eventually leads to the scammer demanding me to download this app or "Telegram". That is proof enough of the scammers intent. BTW, Fakebook will NOT remove the scammers fake accounts! BEWARE!''',
    '''TOPIC 3 [2024-10-13]: There should seriously be an option to not receive messages from unknown numbers. Please start request before messaging & calling otherwise scammers and fraud people will continue misusing it & no one will feel like continuing WhatsApp'''
]
reviews['account_hacked'] = [
    '''TOPIC 3 [2024-10-16]: I get scammed through whatsapp its hacked and i can't use it anymore person is using that number sending messages asking for money''',
    '''TOPIC 1 [2024-10-15]: I really need you guys to do some thing for me asap. My account got hack by scammers and there asking people money with my name''',
    '''TOPIC 3 [2024-10-17]: If your account is hacked they don't even provide the support one of the worst application recently I have seen. I sent so many mails to the whatsapp team for support but they are giving worst support to us.'''
]

In [18]:
# quality connect relative
find_reviews_by_keyword_list(df, ['hack'])

TOPIC 3 [2024-10-17]: If your account is hacked they don't even provide the support one of the worst application recently I have seen. I sent so many mails to the whatsapp team for support but they are giving worst support to us.
TOPIC 3 [2024-10-17]: One person hacked my whatsapp and message through what's app to my friends without my permission with this security level how can you people asked me to try whatsapp pay?
TOPIC 2 [2024-10-17]: You know this WhatsApp is about my number and my privacy if i need it be be my own account, so lately my account was hacked by someone, and i have been complaining about it, but after it's reviewed, then i text someone, i get banned, i have been banned 4 times now, with a mistake i know about it, someone hacked my account and it is getting me banned from my own WhatsApp account, seriously this is supposed protect me from those kind of pple. So please help me out to find who has got me banned.
TOPIC 2 [2024-10-17]: My account has been hacked what csn