Imports and setups

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

sys.path.append(os.path.join(os.getcwd(), '..'))
sys.path.append(os.path.join(os.getcwd(), '../scripts'))
from scripts.sentiment_analysis import SentimentAnalyzer
from scripts.config import DATA_PATHS

Load Data

In [2]:
# Initialize analyzer
analyzer = SentimentAnalyzer()

# Use your data loader
df = analyzer.load_data()

# Check if data loaded successfully
if df is None:
    print("‚ùå Cannot continue - no data loaded")
else:
    print(f"‚úÖ Loaded {len(df)} reviews")
    print("üìä Banks:", df['bank_name'].unique())
    analyzer.df = df  # Important: assign to analyzer

‚úÖ Successfully loaded 1200 reviews
‚úÖ Loaded 1200 reviews
üìä Banks: ['Commercial bank of Ethiopia' 'Bank of Abyssinia' 'Dashen Bank']


Initialize Modules and Run Analysis

In [3]:
# Only run if data loaded successfully
if df is not None:
    analyzer.initialize_sentiment_model()
    df_with_sentiment = analyzer.analyze_real_data()
    
    print("‚úÖ Sentiment analysis complete!")
    print("üìà Sentiment distribution:")
    print(df_with_sentiment['sentiment_label'].value_counts())
else:
    df_with_sentiment = None
    print("‚ùå Skipping analysis - no data available")

‚ö†Ô∏è Using TextBlob for sentiment analysis (faster setup)
Applying sentiment analysis to real data...
‚úÖ Sentiment analysis complete!
‚úÖ Sentiment analysis complete!
üìà Sentiment distribution:
sentiment_label
POSITIVE    632
NEUTRAL     459
NEGATIVE    109
Name: count, dtype: int64


In [4]:
# Make plots a bit prettier
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (8, 4)

Text Processing

In [5]:
if df_with_sentiment is not None:
    def rating_to_label(r):
        if r <= 2: return "negative"
        elif r == 3: return "neutral"
        else: return "positive"

    df_with_sentiment["rating_label"] = df_with_sentiment["rating"].apply(rating_to_label)
    df_with_sentiment["clean_text"] = df_with_sentiment["review_text"].str.lower()
    
    print("‚úÖ Text preprocessing complete")
    display(df_with_sentiment[["review_text", "rating", "rating_label"]].head(3))
else:
    print("‚ùå Skipping preprocessing - no data")

‚úÖ Text preprocessing complete


Unnamed: 0,review_text,rating,rating_label
0,CBE ·ã≠·àà·ã´·àç·ç¢,5,positive
1,it's special for me,5,positive
2,Make it user friendly.,2,negative


Frequency Analysis

In [6]:
if df_with_sentiment is not None:
    try:
        freq_df = analyzer.frequency_test()
        # ... rest of your frequency analysis code
        print("‚úÖ Frequency analysis complete")
    except Exception as e:
        print("‚ö†Ô∏è Frequency analysis skipped:", e)
else:
    print("‚ùå Skipping frequency analysis - no data")

‚úÖ Frequency analysis complete


Aggregation and Themes

In [7]:
if df_with_sentiment is not None:
    # ‚úÖ TASK 2: Sentiment aggregation
    print("=== TASK 2: SENTIMENT BY BANK & RATING ===")
    bank_rating_sentiment = df_with_sentiment.groupby(['bank_name', 'rating']).agg({
        'sentiment_score': 'mean',
        'review_text': 'count'
    }).rename(columns={'review_text': 'review_count'}).reset_index()
    print(bank_rating_sentiment)

    # ‚úÖ TASK 2: Thematic analysis
    print("\n=== TASK 2: TOP KEYWORDS BY BANK ===")
    for bank in df_with_sentiment['bank_name'].unique():
        bank_reviews = df_with_sentiment[df_with_sentiment['bank_name'] == bank]
        print(f"\n--- {bank} ---")
        
        from sklearn.feature_extraction.text import TfidfVectorizer
        tfidf = TfidfVectorizer(stop_words='english', max_features=8)
        tfidf_matrix = tfidf.fit_transform(bank_reviews['clean_text'])
        feature_names = tfidf.get_feature_names_out()
        print("Keywords:", list(feature_names))
else:
    print("‚ùå Cannot complete Task 2 - no data available")

=== TASK 2: SENTIMENT BY BANK & RATING ===
                      bank_name  rating  sentiment_score  review_count
0             Bank of Abyssinia       1        -0.099326           134
1             Bank of Abyssinia       2         0.096214            15
2             Bank of Abyssinia       3         0.119560            22
3             Bank of Abyssinia       4         0.292164            23
4             Bank of Abyssinia       5         0.429618           206
5   Commercial bank of Ethiopia       1        -0.042707            56
6   Commercial bank of Ethiopia       2         0.099280            13
7   Commercial bank of Ethiopia       3         0.144211            23
8   Commercial bank of Ethiopia       4         0.241601            40
9   Commercial bank of Ethiopia       5         0.433438           268
10                  Dashen Bank       1        -0.124915            73
11                  Dashen Bank       2         0.119978            18
12                  Dashen Bank   

Save Results

In [8]:
if df_with_sentiment is not None:
    output_file = os.path.join(DATA_PATHS['processed'], "bank_reviews_with_sentiment.csv")
    df.to_csv(output_file, index=False, encoding='utf-8')
    print(f"[SAVED] reviews sentiment ‚Üí {output_file}")
else:
    print("‚ùå Could not save results - no data available")

OSError: Cannot save file into a non-existent directory: 'data\processed'