In [1]:
import sys
import os
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

# Download VADER lexicon if not already
nltk.download('vader_lexicon')

# Load CSV
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
csv_path = os.path.join(project_root, "data", "processed", "reviews_processed.csv")
df = pd.read_csv(csv_path)

# Initialize VADER
sia = SentimentIntensityAnalyzer()

# Compute sentiment scores on the correct column
df['sentiment_score_vader'] = df['review_text'].apply(lambda x: sia.polarity_scores(str(x))['compound'])

# Assign sentiment labels
def label_sentiment(score):
    if score >= 0.05:
        return 'positive'
    elif score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

df['sentiment_label_vader'] = df['sentiment_score_vader'].apply(label_sentiment)

# Aggregate by bank_name and rating
agg_df = df.groupby(['bank_name', 'rating']).agg(
    mean_sentiment=('sentiment_score_vader', 'mean'),
    count=('sentiment_score_vader', 'count')
).reset_index()

# Display results
print("Sample sentiment-labeled reviews:")
print(df[['bank_name','sentiment_score_vader','sentiment_label_vader']].head())

print("\nAggregated sentiment by bank and rating:")
print(agg_df)


# Save VADER results with sentiment labels
output_path = os.path.join(project_root, "data", "processed", "reviews_with_vader.csv")
df.to_csv(output_path, index=False)
print(f"VADER sentiment results saved to: {output_path}")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\derej\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Sample sentiment-labeled reviews:
           bank_name  sentiment_score_vader sentiment_label_vader
0  Bank of Abyssinia                -0.7973              negative
1  Bank of Abyssinia                -0.4268              negative
2  Bank of Abyssinia                 0.0000               neutral
3  Bank of Abyssinia                -0.4019              negative
4  Bank of Abyssinia                -0.1531              negative

Aggregated sentiment by bank and rating:
                      bank_name  rating  mean_sentiment  count
0             Bank of Abyssinia       1       -0.204237    337
1             Bank of Abyssinia       2       -0.042334     38
2             Bank of Abyssinia       3        0.076490     40
3             Bank of Abyssinia       4        0.454253     19
4             Bank of Abyssinia       5        0.452120    131
5   Commercial Bank of Ethiopia       1       -0.093957    181
6   Commercial Bank of Ethiopia       2       -0.003328     54
7   Commercial Bank of E