# 1. Library Installation

In [6]:
!pip install -q pandas
!pip install -q vaderSentiment

# 2. Import Dataset

In [36]:
import pandas as pd

file_path = '../dataset/dataset.csv'
df = pd.read_csv(file_path)

In [37]:
df

Unnamed: 0,app,content,score,sentiment_score,sentiment
0,Disaster Alert,Working as a Public Health Nurse I get to resp...,5,0.1007,positive
1,Disaster Alert,Nice to have before traveling to unknown terri...,5,0.4215,positive
2,Disaster Alert,I like! I'm trying to find anything about tsun...,5,0.4738,positive
3,Disaster Alert,good to have but what options are expected in ...,5,0.2382,positive
4,Disaster Alert,Shows hazards all right but refuses to send no...,2,-0.5588,negative
...,...,...,...,...,...
27641,Earthquake,It works quite well even anticipates some othe...,5,0.7264,positive
27642,Earthquake,This application is very good.,5,0.4927,positive
27643,Earthquake,Data from earthquakes in Chile in the last 24 ...,2,0.0000,positive
27644,Earthquake,This is as good as earthquake apps can go. Thi...,5,0.8481,positive


# 3. Sentiment Analysis

In [17]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import time

analyzer = SentimentIntensityAnalyzer()
sentiment_score_list = []

start_time = time.time()
for i, sentence in enumerate(df['content'], 1):
    vs = analyzer.polarity_scores(sentence)
    sentiment_score_list.append(vs['compound'])

    # Print progress every 500 sentences
    if i % 1000 == 0:
        elapsed_time = time.time() - start_time
        print(f"Processed {i} sentences in {elapsed_time:.2f} seconds.")

# Total time taken
total_time = time.time() - start_time
print(f"Total time taken: {total_time:.2f} seconds.")

Processed 500 sentences in 0.09 seconds.
Processed 1000 sentences in 0.14 seconds.
Processed 1500 sentences in 0.20 seconds.
Processed 2000 sentences in 0.26 seconds.
Processed 2500 sentences in 0.30 seconds.
Processed 3000 sentences in 0.35 seconds.
Processed 3500 sentences in 0.40 seconds.
Processed 4000 sentences in 0.45 seconds.
Processed 4500 sentences in 0.50 seconds.
Processed 5000 sentences in 0.56 seconds.
Processed 5500 sentences in 0.62 seconds.
Processed 6000 sentences in 0.66 seconds.
Processed 6500 sentences in 0.77 seconds.
Processed 7000 sentences in 0.87 seconds.
Processed 7500 sentences in 0.93 seconds.
Processed 8000 sentences in 0.98 seconds.
Processed 8500 sentences in 1.02 seconds.
Processed 9000 sentences in 1.07 seconds.
Processed 9500 sentences in 1.12 seconds.
Processed 10000 sentences in 1.18 seconds.
Processed 10500 sentences in 1.24 seconds.
Processed 11000 sentences in 1.30 seconds.
Processed 11500 sentences in 1.38 seconds.
Processed 12000 sentences in 1.

In [21]:
df['sentiment_score'] = sentiment_score_list
df['sentiment'] = df['sentiment_score'].apply(lambda score: 'positive' if score >= 0 else 'negative')

In [22]:
df

Unnamed: 0,app,content,score,sentiment_score,sentiment
0,Disaster Alert,Working as a Public Health Nurse I get to resp...,5,0.1007,positive
1,Disaster Alert,Nice to have before traveling to unknown terri...,5,0.4215,positive
2,Disaster Alert,I like! I'm trying to find anything about tsun...,5,0.4738,positive
3,Disaster Alert,good to have but what options are expected in ...,5,0.2382,positive
4,Disaster Alert,Shows hazards all right but refuses to send no...,2,-0.5588,negative
...,...,...,...,...,...
27643,Earthquake,It works quite well even anticipates some othe...,5,0.7264,positive
27644,Earthquake,This application is very good.,5,0.4927,positive
27645,Earthquake,Data from earthquakes in Chile in the last 24 ...,2,0.0000,positive
27646,Earthquake,This is as good as earthquake apps can go. Thi...,5,0.8481,positive


In [23]:
df.groupby(['score', 'sentiment']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,app,content,sentiment_score
score,sentiment,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,positive,2,2,2
1,negative,2843,2843,2843
1,positive,2808,2808,2808
2,negative,972,972,972
2,positive,1278,1278,1278
3,negative,770,770,770
3,positive,1683,1683,1683
4,negative,485,485,485
4,positive,3259,3259,3259
5,negative,797,797,797


In [24]:
df[df.score == 0]

Unnamed: 0,app,content,score,sentiment_score,sentiment
6009,Earthquake Alert!,Love app very informative. Is there a way to s...,0,0.3612,positive
6021,Earthquake Alert!,I live in Indiana daughter is in San Francisco...,0,0.0,positive


In [26]:
df[(df.score == 1)&(df.sentiment == 'positive')]

Unnamed: 0,app,content,score,sentiment_score,sentiment
7,Disaster Alert,Needs dark mode...unable to properly evaluate.,1,0.0000,positive
24,Disaster Alert,You know I wanted to put your app back on my p...,1,0.5927,positive
36,Disaster Alert,Can't open app all the time.,1,0.0000,positive
38,Disaster Alert,Disaster is a very strong and very descriptive...,1,0.3832,positive
40,Disaster Alert,7-6-2022 You have not posted one not one earth...,1,0.0000,positive
...,...,...,...,...,...
27596,Earthquake,It doesn't let you do anything or send seismic...,1,0.0000,positive
27604,Earthquake,The app takes a long time to warn you about th...,1,0.3612,positive
27606,Earthquake,There was a tremor in my city Cochabamba Boliv...,1,0.0000,positive
27618,Earthquake,Unable to focus on one state or zip code!,1,0.0000,positive


In [30]:
df[(df.score == 1)&(df.sentiment == 'positive')].iloc[1].content

'You know I wanted to put your app back on my phone but I decided not to after taking a look at it. Because you people chopped it up and changed the whole app. It was much better years ago.'