In [5]:
# Load the cleaned data

import pandas as pd

reviews = pd.read_csv('../data/cleaned_reviews.csv', parse_dates=["date"])

# Drop rows with missing values

reviews.dropna(inplace=True)

In [6]:
reviews.shape
print(reviews.head())

# Data types
print(reviews.dtypes)

# Info
print(reviews.info())

# Describe summary statistics
print(reviews.describe())

#Check for missing values
print(reviews.isna().sum())

                                              review  rating bank  \
0                         So bad now and hard to use       5  CBE   
1  it is so amazing app. but, it is better to upd...       5  CBE   
2                                         v.good app       4  CBE   
3                                      very good app       1  CBE   
4           Very amazing app indeed. I'm enjoying it       5  CBE   

        source       date                                   processed_review  
0  Google Play 2025-06-09                                       bad hard use  
1  Google Play 2025-06-09  amazing app better update access without inter...  
2  Google Play 2025-06-09                                                app  
3  Google Play 2025-06-09                                           good app  
4  Google Play 2025-06-08                        amazing app indeed enjoying  
review                      object
rating                       int64
bank                        object
source

In [7]:
# Sentiment analysis using Vader

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER
analyzer = SentimentIntensityAnalyzer()

# Define a function to label sentiment
def get_vader_sentiment(text):
    score = analyzer.polarity_scores(text)['compound']
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Apply to each review
reviews['vader_sentiment'] = reviews['processed_review'].apply(get_vader_sentiment)

# Display sentiment distribution
print(reviews.groupby('bank')['vader_sentiment'].value_counts())

bank    vader_sentiment
BOA     positive           156
        neutral            128
        negative           114
CBE     positive           243
        neutral            106
        negative            37
Dashen  positive           285
        neutral             76
        negative            19
Name: count, dtype: int64


In [8]:
# Sentiment analysis using TextBlob

from textblob import TextBlob

def get_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Apply sentiment analysis
reviews['textblob_sentiment'] = reviews['processed_review'].apply(get_sentiment)

# Display sentiment distribution
print(reviews.groupby('bank')['textblob_sentiment'].value_counts())

bank    textblob_sentiment
BOA     positive              159
        neutral               136
        negative              103
CBE     positive              238
        neutral               120
        negative               28
Dashen  positive              281
        neutral                72
        negative               27
Name: count, dtype: int64
