In [3]:
# %pip install transformers

In [5]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
import numpy as np

def load_model():
    """Load DistilBERT sentiment analysis model"""
    return pipeline(
        "sentiment-analysis",
        model="distilbert-base-uncased-finetuned-sst-2-english",
        truncation=True
    )

def analyze_sentiment(df, model):
    """Add sentiment analysis columns to DataFrame"""
    tqdm.pandas(desc="Analyzing sentiment")
    
    # Analyze in batches to handle memory
    sentiments = []
    for text in tqdm(df['review'], desc="Processing reviews"):
        try:
            result = model(text[:512])[0]  # Truncate to model max length
            sentiments.append({
                'sentiment_label': result['label'],
                'sentiment_score': result['score']
            })
        except:
            sentiments.append({
                'sentiment_label': 'ERROR',
                'sentiment_score': 0.0
            })
    
    sentiment_df = pd.DataFrame(sentiments)
    return pd.concat([df, sentiment_df], axis=1)

def aggregate_sentiment(df):
    """Calculate mean sentiment by bank and rating"""
    return df.groupby(['bank', 'rating']).agg({
        'sentiment_score': ['mean', 'count'],
        'sentiment_label': lambda x: x.value_counts().to_dict()
    }).reset_index()

if __name__ == "__main__":
    # Load data
    df = pd.read_csv('clean_reviews.csv')
    
    # Initialize model
    sentiment_pipeline = load_model()
    
    # Analyze sentiment
    analyzed_df = analyze_sentiment(df, sentiment_pipeline)
    analyzed_df.to_csv('../data/analyzed_results.csv', index=False)
    
    # Generate aggregates
    agg_df = aggregate_sentiment(analyzed_df)
    print("\nSentiment by Bank and Rating:")
    print(agg_df.to_markdown())

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


RuntimeError: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.