In [1]:
from google_play_scraper import reviews, Sort
from textblob import TextBlob
import matplotlib.pyplot as plt
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import seaborn as sns

In [2]:
result, continuation_token = reviews(app_id='com.ashwhale.sepush.eskom',lang='en',country='us',sort=Sort.NEWEST)

In [3]:
result

[{'reviewId': '81139d58-02e0-4545-8d4e-3a6ab72de837',
  'userName': 'Pule Matlho',
  'userImage': 'https://play-lh.googleusercontent.com/a-/ALV-UjVpSPO7VHipJ-Olg51OtMkLw5jcgaPVf5fw3sVBhdWUKQkMHS-l',
  'content': 'Exceptional experience with updates. Always keeping me updated about power outages and stability. Best App by far!',
  'score': 5,
  'thumbsUpCount': 0,
  'reviewCreatedVersion': None,
  'at': datetime.datetime(2024, 9, 12, 17, 19, 59),
  'replyContent': None,
  'repliedAt': None,
  'appVersion': None},
 {'reviewId': '52e3a65a-9c40-4a3a-8f21-f0dada755b6f',
  'userName': 'Isaac Malau',
  'userImage': 'https://play-lh.googleusercontent.com/a-/ALV-UjUlcO7MhqITSUv3I-faoGISzSssZdY-pt0BDXS23mhqLFcHmspl',
  'content': 'Great',
  'score': 5,
  'thumbsUpCount': 0,
  'reviewCreatedVersion': '4.3.0',
  'at': datetime.datetime(2024, 9, 11, 23, 6, 56),
  'replyContent': None,
  'repliedAt': None,
  'appVersion': '4.3.0'},
 {'reviewId': 'ca8d7492-6a44-4ba1-a280-23145b227341',
  'userName': 

In [5]:
reviews = [result[i]['content'] for i in range(100)]

In [None]:
reviews

In [None]:
reviews_df=pd.DataFrame(reviews)

In [None]:
reviews_df

In [None]:
import re
def clean_data(text):
    return ' '.join(re.sub("(@[a-zA-Z0-9]+)|([^0-9A-Za-z])|(https://[\w.]+/[\w]+)", " ", text).split())

reviews_df['cleaned_data'] = reviews_df[0].apply(clean_data)

In [None]:
reviews_df.head()

In [None]:
def drop_numbers(list_text):
    list_text_new = []
    for i in list_text:
        if not re.search('\d', i):
            list_text_new.append(i)
    return ''.join(list_text_new)

reviews_df['cleaned_data'] = reviews_df['cleaned_data'].apply(drop_numbers)

In [None]:
reviews_df

In [None]:
import nltk
nltk.download('punkt')
def lower_case(text):
    text_words = word_tokenize(text)
    text_words_lower = [x.lower() for x in text_words]
    return ' '.join(text_words_lower)

reviews_df['cleaned_data'] = reviews_df['cleaned_data'].apply(lower_case)

In [None]:
nltk.download('wordnet')

In [None]:
nltk.download('omw-1.4')

In [None]:
lemmatizer = WordNetLemmatizer()
def lemmatise(text):
    text_tokens = word_tokenize(text)
    text_lemm = [lemmatizer.lemmatize(word) for word in text_tokens]
    return ' '.join(text_lemm)

reviews_df['cleaned_data'] = reviews_df['cleaned_data'].apply(lemmatise)

In [None]:
def remove_stopword(text):
    text_tokens = word_tokenize(text)
    tokens = [word for word in text_tokens if not word in set(stopwords.words('english'))]
    tokens_text = ' '.join(tokens)
    return tokens_text

reviews_df['cleaned_data'] = reviews_df['cleaned_data'].apply(remove_stopword)

In [None]:
reviews_df.head()

In [None]:
def get_polarity(text):
    textblob = TextBlob(str(text))
    pol = textblob.sentiment.polarity
    if(pol==0):
        return "Neutral"
    elif(pol>0 and pol<=0.3):
        return "Weakly Positive"
    elif(pol>0.3 and pol<=0.6):
        return "Positive"
    elif(pol>0.6 and pol<=1):
        return "Strongly Positive"
    elif(pol>-0.3 and pol<=0):
        return "Weakly Negative"
    elif(pol>-0.6 and pol<=-0.3):
        return "Negative"
    elif(pol>-1 and pol<=-0.6):
        return "Strongly Negative"
    
reviews_df['polarity'] = reviews_df['cleaned_data'].apply(get_polarity)

In [None]:
reviews_df.head()

In [None]:
reviews_df['polarity'].value_counts()

In [None]:
neutral = 0
wpositive = 0
spositive = 0
positive = 0
negative = 0
wnegative = 0
snegative = 0
polarity = 0

for i in range(0,100):
    textblob = TextBlob(str(reviews_df['cleaned_data'][i]))
    polarity+= textblob.sentiment.polarity
    pol = textblob.sentiment.polarity
    if (pol == 0):  # adding reaction of how people are reacting to find average later
        neutral += 1
    elif (pol > 0 and pol <= 0.3):
        wpositive += 1
    elif (pol > 0.3 and pol <= 0.6):
        positive += 1
    elif (pol > 0.6 and pol <= 1):
        spositive += 1
    elif (pol > -0.3 and pol <= 0):
        wnegative += 1
    elif (pol > -0.6 and pol <= -0.3):
        negative += 1
    elif (pol > -1 and pol <= -0.6):
        snegative += 1

In [None]:
NoOfTerms = 100
polarity = polarity / NoOfTerms
polarity

In [None]:
def percentage(part, whole):
    temp = 100 * float(part) / float(whole)
    return format(temp, '.2f')

In [None]:
positive = percentage(positive, NoOfTerms)
wpositive = percentage(wpositive, NoOfTerms)
spositive = percentage(spositive, NoOfTerms)
negative = percentage(negative, NoOfTerms)
wnegative = percentage(wnegative, NoOfTerms)
snegative = percentage(snegative, NoOfTerms)
neutral = percentage(neutral, NoOfTerms)

In [None]:
print("How people are reacting on " + "the load shedding app" + " by analyzing " + str(NoOfTerms) + " reviews.")
print()
print("-----------------------------------------------------------------------------------------")
print()
print("General Report: ")

if (polarity == 0):
    print("Neutral")
elif (polarity > 0 and polarity <= 0.3):
    print("Weakly Positive")
elif (polarity > 0.3 and polarity <= 0.6):
    print("Positive")
elif (polarity > 0.6 and polarity <= 1):
    print("Strongly Positive")
elif (polarity > -0.3 and polarity <= 0):
    print("Weakly Negative")
elif (polarity > -0.6 and polarity <= -0.3):
    print("Negative")
elif (polarity > -1 and polarity <= -0.6):
    print("Strongly Negative")

print()
print("------------------------------------------------------------------------------------------")
print()
print("Detailed Report: ")
print(str(positive) + "% people thought it was positive")
print(str(wpositive) + "% people thought it was weakly positive")
print(str(spositive) + "% people thought it was strongly positive")
print(str(negative) + "% people thought it was negative")
print(str(wnegative) + "% people thought it was weakly negative")
print(str(snegative) + "% people thought it was strongly negative")
print(str(neutral) + "% people thought it was neutral")

In [None]:
sizes = [positive, wpositive, spositive, neutral, negative, wnegative, snegative]
colors = ['yellowgreen','lightgreen','darkgreen', 'gold', 'red','lightsalmon','darkred']
labels = ['Positive [' + str(positive) + '%]', 'Weakly Positive [' + str(wpositive) + '%]',
          'Strongly Positive [' + str(spositive) + '%]', 'Neutral [' + str(neutral) + '%]',
          'Negative [' + str(negative) + '%]', 'Weakly Negative [' + str(wnegative) + '%]', 
          'Strongly Negative [' + str(snegative) + '%]']

In [None]:
plt.pie(sizes, labels = labels, colors = colors)
plt.legend(labels, loc="best")
plt.title('How people are reacting on ' + 'the loadshedding app' + ' by analyzing ' + str(NoOfTerms) + ' reviews.')
plt.axis('equal')
plt.tight_layout()
plt.show()

In [None]:
data =[positive, wpositive, spositive, neutral, negative, wnegative, snegative]
index = ['Positive', 'Weakly Positive', 'Strongly Positive','Neutral','Negative','Weakly Negative','Strongly Negative']
df = pd.DataFrame(data,index,columns=['Reactions'])

In [None]:
print(df)

In [None]:
plt.pie(x=df['Reactions'],labels=index)
plt.tight_layout()