In [1]:
import pandas as pd
from textblob import TextBlob
import gensim
from gensim import corpora
from gensim.models import LdaModel
import pyLDAvis.gensim_models
import spacy
nlp = spacy.load('en_core_web_sm')

  import sre_constants


In [2]:
def get_sentiment(review):
    testimonial = TextBlob(review)
    polarity = testimonial.sentiment.polarity
    # Classify the sentiment as positive, negative, or neutral based on the polarity score
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

In [3]:
def get_proccesed_doc(docs):
    doc = nlp(docs)
    doc1 = [token for token in doc if not token.is_stop]
    doc2 = [token.text for token in doc1 if token.is_alpha]
    final_text = ' '.join(doc2)
    return doc2

In [4]:
def get_proccesed_text(docs):
    doc = nlp(docs)
    doc1 = [token for token in doc if not token.is_stop]
    doc2 = [token.text for token in doc1 if token.is_alpha]
    final_text = ' '.join(doc2)
    return final_text

In [5]:
df  = pd.read_csv('final_data.csv')

In [6]:
df['Sentiment'] = df['Reviews'].apply(get_sentiment)

In [7]:
df.head()

Unnamed: 0,Ratings,Reviews,Sentiment
0,2,"Since the last 2-3 updates, the application wo...",positive
1,4,There's no feature to share my screen. Update ...,negative
2,2,"The app tries to improve, but takes a step bac...",positive
3,1,After the last update it's a nightmare! I am t...,negative
4,2,Switching phones is not easy with this app. I ...,negative


In [8]:
groups = df.groupby('Sentiment')

In [9]:
negative_comments = groups.get_group('negative')
positive_comments = groups.get_group('positive')

In [10]:
negative_comments.head()

Unnamed: 0,Ratings,Reviews,Sentiment
1,4,There's no feature to share my screen. Update ...,negative
3,1,After the last update it's a nightmare! I am t...,negative
4,2,Switching phones is not easy with this app. I ...,negative
7,1,Exceptionally horrible! Forces close. Freezes....,negative
11,1,It has been extremely buggy for the last few m...,negative


In [11]:
negative_comments_list = negative_comments['Reviews'].to_list()
positive_comments_list = positive_comments['Reviews'].to_list()

In [12]:
negative_document = list(map(get_proccesed_doc, negative_comments_list))
processed_negative_comments = list(map(get_proccesed_text, negative_comments_list))

In [13]:
positive_document = list(map(get_proccesed_doc, positive_comments_list))
processed_positive_comments = list(map(get_proccesed_text, positive_comments_list))

In [14]:
positive_dic = corpora.Dictionary(positive_document)
negative_dic = corpora.Dictionary(negative_document)

In [15]:
negative_corpus = [negative_dic.doc2bow(doc) for doc in negative_document]
positive_corpus = [positive_dic.doc2bow(doc) for doc in positive_document]

In [17]:
num_topic = 20
positive_model = LdaModel(positive_corpus, num_topics=num_topic, id2word=positive_dic, passes=25)
negative_model = LdaModel(negative_corpus, num_topics=num_topic, id2word=negative_dic, passes=25)

In [18]:
# Print the top 10 topics and their associated words
top_topics = positive_model.print_topics(num_topics=10, num_words=10)

for topic in top_topics:
    print(topic)

(6, '0.070*"chat" + 0.060*"group" + 0.050*"chats" + 0.045*"feature" + 0.027*"groups" + 0.024*"lock" + 0.023*"option" + 0.022*"messages" + 0.018*"like" + 0.016*"app"')
(13, '0.017*"privacy" + 0.013*"access" + 0.013*"successful" + 0.012*"mistake" + 0.009*"tiring" + 0.008*"app" + 0.008*"specific" + 0.008*"FB" + 0.008*"looks" + 0.007*"statuses"')
(5, '0.043*"app" + 0.039*"WhatsApp" + 0.030*"new" + 0.024*"features" + 0.023*"update" + 0.022*"channel" + 0.022*"like" + 0.017*"channels" + 0.016*"feature" + 0.014*"friends"')
(16, '0.052*"like" + 0.014*"add" + 0.013*"message" + 0.013*"send" + 0.013*"whatsapp" + 0.012*"want" + 0.010*"messages" + 0.010*"read" + 0.009*"check" + 0.008*"function"')
(7, '0.027*"new" + 0.020*"emoji" + 0.016*"update" + 0.016*"app" + 0.015*"old" + 0.015*"keyboard" + 0.014*"text" + 0.012*"like" + 0.010*"version" + 0.010*"chat"')
(10, '0.069*"video" + 0.056*"voice" + 0.035*"screen" + 0.027*"calls" + 0.025*"audio" + 0.017*"note" + 0.014*"sharing" + 0.011*"problem" + 0.010*"c

In [19]:
# Print the top 10 topics and their associated words
top_topics = negative_model.print_topics(num_topics=10, num_words=10)

for topic in top_topics:
    print(topic)

(9, '0.043*"apps" + 0.037*"issues" + 0.032*"screen" + 0.031*"connection" + 0.028*"calls" + 0.028*"try" + 0.025*"video" + 0.025*"phone" + 0.025*"WhatsApp" + 0.024*"update"')
(18, '0.040*"going" + 0.040*"chats" + 0.039*"typing" + 0.039*"like" + 0.038*"app" + 0.037*"page" + 0.036*"long" + 0.033*"search" + 0.031*"results" + 0.029*"group"')
(19, '0.075*"horrible" + 0.050*"stuffs" + 0.050*"skipping" + 0.049*"happend" + 0.017*"things" + 0.010*"day" + 0.010*"n" + 0.008*"gif" + 0.008*"Bring" + 0.008*"u"')
(17, '0.052*"account" + 0.047*"WhatsApp" + 0.042*"number" + 0.038*"issue" + 0.027*"app" + 0.025*"whatsapp" + 0.022*"new" + 0.021*"blocked" + 0.018*"time" + 0.017*"showed"')
(15, '0.086*"WhatsApp" + 0.056*"channels" + 0.051*"like" + 0.036*"worst" + 0.034*"new" + 0.034*"update" + 0.030*"app" + 0.028*"seriously" + 0.028*"confused" + 0.022*"version"')
(3, '0.097*"quality" + 0.056*"video" + 0.056*"status" + 0.046*"sent" + 0.041*"videos" + 0.038*"files" + 0.037*"needs" + 0.033*"upload" + 0.030*"bad"

In [34]:
import pyLDAvis.gensim

In [35]:
import joblib

In [42]:
pyLDAvis.enable_notebook()
positive_lda_visualization = pyLDAvis.gensim.prepare(positive_model, positive_corpus, positive_dic)
negative_lda_visualization = pyLDAvis.gensim.prepare(negative_model, negative_corpus, negative_dic)

In [43]:
positive_lda_visualization

In [44]:
negative_lda_visualization

In [47]:
import re

# Initialize an empty set to store unique words
all_words = set()

# Extract words from each topic string and add them to the set
for topic in top_topics:
    words_with_weights = re.findall(r'"(.*?)"', topic[1])  # Extract words within double quotes
    for word in words_with_weights:
        all_words.add(word)

# Display all the unique words
print(all_words)


{'contacts', 'worst', 'u', 'load', 'typing', 'quality', 'star', 'weeks', 'bad', 'help', 'WhatsApp', 'going', 'blocked', 'day', 'apps', 'issues', 'page', 'lot', 'upload', 'suddenly', 'things', 'needs', 'screen', 'time', 'showed', 'status', 'happend', 'loading', 'stickers', 'Hated', 'account', 'group', 'experience', 'confused', 'try', 'phone', 'end', 'information', 'search', 'files', 'lost', 'chats', 'results', 'gif', 'code', 'emojis', 'like', 'n', 'access', 'sticker', 'keyboard', 'makes', 'number', 'forever', 'skipping', 'single', 'verification', 'channels', 'annoying', 'Bring', 'videos', 'horrible', 'connection', 'picture', 'update', 'calls', 'want', 'stuffs', 'chat', 'whatsapp', 'emoji', 'app', 'profile', 'seriously', 'frustrating', 'long', 'version', 'video', 'issue', 'new', 'fine', 'sent', 'taking'}


In [45]:
top_topics

[(9,
  '0.043*"apps" + 0.037*"issues" + 0.032*"screen" + 0.031*"connection" + 0.028*"calls" + 0.028*"try" + 0.025*"video" + 0.025*"phone" + 0.025*"WhatsApp" + 0.024*"update"'),
 (18,
  '0.040*"going" + 0.040*"chats" + 0.039*"typing" + 0.039*"like" + 0.038*"app" + 0.037*"page" + 0.036*"long" + 0.033*"search" + 0.031*"results" + 0.029*"group"'),
 (19,
  '0.075*"horrible" + 0.050*"stuffs" + 0.050*"skipping" + 0.049*"happend" + 0.017*"things" + 0.010*"day" + 0.010*"n" + 0.008*"gif" + 0.008*"Bring" + 0.008*"u"'),
 (17,
  '0.052*"account" + 0.047*"WhatsApp" + 0.042*"number" + 0.038*"issue" + 0.027*"app" + 0.025*"whatsapp" + 0.022*"new" + 0.021*"blocked" + 0.018*"time" + 0.017*"showed"'),
 (15,
  '0.086*"WhatsApp" + 0.056*"channels" + 0.051*"like" + 0.036*"worst" + 0.034*"new" + 0.034*"update" + 0.030*"app" + 0.028*"seriously" + 0.028*"confused" + 0.022*"version"'),
 (3,
  '0.097*"quality" + 0.056*"video" + 0.056*"status" + 0.046*"sent" + 0.041*"videos" + 0.038*"files" + 0.037*"needs" + 0.033