# Title & Setup

In [1]:
import warnings
warnings.filterwarnings('ignore')

print("="*80)
print(" "*20 + "üéØ ASPECT-BASED SENTIMENT ANALYSIS")
print(" "*25 + "Financial News Demo")
print("="*80)
print("\nüìä Model: RoBERTa-base Fine-tuned on SEntFiN Dataset")
print("üéØ Task: Entity-specific Sentiment Classification")
print("üìà Performance: 97.55% Accuracy | F1-Score: 0.9757")
print("\n" + "="*80)

                    üéØ ASPECT-BASED SENTIMENT ANALYSIS
                         Financial News Demo

üìä Model: RoBERTa-base Fine-tuned on SEntFiN Dataset
üéØ Task: Entity-specific Sentiment Classification
üìà Performance: 97.55% Accuracy | F1-Score: 0.9757



# Load Model & Dependencies

In [1]:
print("üîÑ Loading model and dependencies...\n")

import torch
import pickle
import io
from transformers import RobertaTokenizer, RobertaModel
import torch.nn as nn
import pandas as pd
from IPython.display import display, HTML

# ===== Custom CPU Unpickler =====
class CPUUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)

# ===== Model Definition =====
class RobertaForABSA(nn.Module):
    def __init__(self, num_labels=3, dropout_rate=0.3):
        super(RobertaForABSA, self).__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.roberta.config.hidden_size, num_labels)
        
    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# ===== Load Model =====
print("   [1/3] Loading model weights...")
device = torch.device('cpu')

try:
    # Try with custom unpickler
    with open('roberta_absa_model.pkl', 'rb') as f:
        checkpoint = CPUUnpickler(f).load()
    print("   ‚úì Model file loaded with CPU unpickler")
except Exception as e:
    print(f"   ‚úó Error loading model: {e}")
    raise

# Initialize model
model = RobertaForABSA()

# Load state dict
if isinstance(checkpoint, dict):
    if 'model_state_dict' in checkpoint:
        state_dict = checkpoint['model_state_dict']
        print("   ‚úì Found model_state_dict in checkpoint")
    else:
        state_dict = checkpoint
        print("   ‚úì Checkpoint is direct state_dict")
    
    # Force all tensors to CPU
    cpu_state_dict = {}
    for key, value in state_dict.items():
        if isinstance(value, torch.Tensor):
            cpu_state_dict[key] = value.cpu()
        else:
            cpu_state_dict[key] = value
    
    model.load_state_dict(cpu_state_dict)
    print("   ‚úì Model weights loaded")
    
elif isinstance(checkpoint, nn.Module):
    # Whole model object
    model = checkpoint.cpu()
    print("   ‚úì Whole model loaded")
else:
    print(f"   ‚úó Unexpected checkpoint type: {type(checkpoint)}")
    raise ValueError("Unexpected checkpoint format")

model = model.to(device)
model.eval()

print("   [2/3] Loading tokenizer...")
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

print("   [3/3] Initializing label mappings...")
id2label = {0: 'negative', 1: 'neutral', 2: 'positive'}
label2id = {'negative': 0, 'neutral': 1, 'positive': 2}

print("\n‚úÖ Model loaded successfully!")
print(f"   Device: {device}")
print(f"   Parameters: ~125M")
print(f"   Labels: {list(label2id.keys())}")
print(f"   Model Status: Ready for inference")

üîÑ Loading model and dependencies...

   [1/3] Loading model weights...


  return lambda b: torch.load(io.BytesIO(b), map_location='cpu')


   ‚úì Model file loaded with CPU unpickler


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


   ‚úì Found model_state_dict in checkpoint
   ‚úì Model weights loaded
   [2/3] Loading tokenizer...
   [3/3] Initializing label mappings...

‚úÖ Model loaded successfully!
   Device: cpu
   Parameters: ~125M
   Labels: ['negative', 'neutral', 'positive']
   Model Status: Ready for inference


# Prediction Function

In [2]:
def predict_sentiment(entity, headline, verbose=True):
    """
    Predict sentiment for a specific entity in a headline
    
    Args:
        entity (str): Entity to analyze
        headline (str): News headline
        verbose (bool): Show detailed output
    
    Returns:
        dict: Prediction results
    """
    # Tokenize
    encoding = tokenizer(
        entity,
        headline,
        padding='max_length',
        truncation=True,
        max_length=40,
        return_tensors='pt'
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    # Predict
    with torch.no_grad():
        logits = model(input_ids, attention_mask)
        probabilities = torch.softmax(logits, dim=1)
        predicted_class = torch.argmax(probabilities, dim=1).item()
        confidence = probabilities[0][predicted_class].item()
    
    sentiment = id2label[predicted_class]
    
    # All probabilities
    all_probs = {
        'negative': probabilities[0][0].item(),
        'neutral': probabilities[0][1].item(),
        'positive': probabilities[0][2].item()
    }
    
    result = {
        'entity': entity,
        'headline': headline,
        'sentiment': sentiment,
        'confidence': confidence,
        'probabilities': all_probs
    }
    
    if verbose:
        display_prediction(result)
    
    return result

def display_prediction(result):
    """Display prediction in formatted style"""
    sentiment = result['sentiment']
    confidence = result['confidence']
    probs = result['probabilities']
    
    # Color coding
    colors = {
        'positive': '#d4edda',
        'negative': '#f8d7da',
        'neutral': '#fff3cd'
    }
    
    icons = {
        'positive': 'üü¢',
        'negative': 'üî¥',
        'neutral': '‚ö™'
    }
    
    html = f"""
    <div style="border: 2px solid #ddd; border-radius: 10px; padding: 20px; margin: 10px 0; background-color: {colors[sentiment]};">
        <h3 style="margin: 0 0 10px 0;">{icons[sentiment]} Sentiment: <strong>{sentiment.upper()}</strong></h3>
        <p style="margin: 5px 0;"><strong>Entity:</strong> {result['entity']}</p>
        <p style="margin: 5px 0;"><strong>Headline:</strong> {result['headline']}</p>
        <p style="margin: 5px 0;"><strong>Confidence:</strong> {confidence:.1%}</p>
        <hr style="margin: 15px 0;">
        <p style="margin: 5px 0;"><strong>Probability Distribution:</strong></p>
        <ul style="margin: 5px 0;">
            <li>üî¥ Negative: {probs['negative']:.1%}</li>
            <li>‚ö™ Neutral: {probs['neutral']:.1%}</li>
            <li>üü¢ Positive: {probs['positive']:.1%}</li>
        </ul>
    </div>
    """
    
    display(HTML(html))

print("‚úÖ Prediction functions ready!")
print("\nUsage:")
print("   predict_sentiment(entity='Gold', headline='Gold shines on demand')")

‚úÖ Prediction functions ready!

Usage:
   predict_sentiment(entity='Gold', headline='Gold shines on demand')


# Demo 1 - Basic Prediction

In [3]:
print("="*80)
print("üìå DEMO 1: Basic Sentiment Prediction")
print("="*80)
print("\nScenario: Analyzing sentiment untuk single entity\n")

# Example 1
print("Example 1:")
print("-" * 60)
result1 = predict_sentiment(
    entity="Gold",
    headline="Gold shines on seasonal demand"
)

üìå DEMO 1: Basic Sentiment Prediction

Scenario: Analyzing sentiment untuk single entity

Example 1:
------------------------------------------------------------


# Demo 1 Continued

In [4]:
print("\nExample 2:")
print("-" * 60)
result2 = predict_sentiment(
    entity="Infosys",
    headline="Infosys announces 10% salary hike for all employees"
)

print("\nExample 3:")
print("-" * 60)
result3 = predict_sentiment(
    entity="MMTC",
    headline="MMTC Q2 net loss at Rs 10.4 crore"
)


Example 2:
------------------------------------------------------------



Example 3:
------------------------------------------------------------


# Demo 2 - Opposing Sentiments

In [5]:
print("\n" + "="*80)
print("üåü DEMO 2: Opposing Sentiments in Same Headline (ABSA Power!)")
print("="*80)
print("\nScenario: Multiple entities dengan sentimen BERBEDA dalam satu headline")
print("\nüí° This is what makes ABSA special! Traditional sentiment analysis fails here.\n")

headline_opposing = "Gold shines on seasonal demand; Silver dull"

print(f"üì∞ Headline: \"{headline_opposing}\"")
print(f"üéØ Entities: Gold, Silver")
print("\n" + "-"*80)

print("\n1Ô∏è‚É£ Analyzing 'Gold':")
result_gold = predict_sentiment(
    entity="Gold",
    headline=headline_opposing
)

print("\n2Ô∏è‚É£ Analyzing 'Silver':")
result_silver = predict_sentiment(
    entity="Silver",
    headline=headline_opposing
)

print("\n" + "="*80)
print("üìä COMPARISON:")
print("="*80)

comparison_df = pd.DataFrame([
    {
        'Entity': 'Gold',
        'Sentiment': result_gold['sentiment'].upper(),
        'Confidence': f"{result_gold['confidence']:.1%}",
        'Negative': f"{result_gold['probabilities']['negative']:.1%}",
        'Neutral': f"{result_gold['probabilities']['neutral']:.1%}",
        'Positive': f"{result_gold['probabilities']['positive']:.1%}"
    },
    {
        'Entity': 'Silver',
        'Sentiment': result_silver['sentiment'].upper(),
        'Confidence': f"{result_silver['confidence']:.1%}",
        'Negative': f"{result_silver['probabilities']['negative']:.1%}",
        'Neutral': f"{result_silver['probabilities']['neutral']:.1%}",
        'Positive': f"{result_silver['probabilities']['positive']:.1%}"
    }
])

display(comparison_df)

print("\n‚ú® Key Insight:")
print("   Model successfully detects OPPOSING sentiments (Positive vs Negative)")
print("   in the SAME headline! This is the power of Aspect-Based approach.")


üåü DEMO 2: Opposing Sentiments in Same Headline (ABSA Power!)

Scenario: Multiple entities dengan sentimen BERBEDA dalam satu headline

üí° This is what makes ABSA special! Traditional sentiment analysis fails here.

üì∞ Headline: "Gold shines on seasonal demand; Silver dull"
üéØ Entities: Gold, Silver

--------------------------------------------------------------------------------

1Ô∏è‚É£ Analyzing 'Gold':



2Ô∏è‚É£ Analyzing 'Silver':



üìä COMPARISON:


Unnamed: 0,Entity,Sentiment,Confidence,Negative,Neutral,Positive
0,Gold,POSITIVE,97.3%,1.6%,1.1%,97.3%
1,Silver,NEGATIVE,70.0%,70.0%,28.1%,1.8%



‚ú® Key Insight:
   Model successfully detects OPPOSING sentiments (Positive vs Negative)
   in the SAME headline! This is the power of Aspect-Based approach.


# Demo 3 - Batch Analysis

In [6]:
print("\n" + "="*80)
print("üìã DEMO 3: Batch Analysis - Multiple Examples")
print("="*80)
print("\nScenario: Analyzing multiple headlines at once\n")

test_cases = [
    ("Apple", "Apple soars as Samsung plunges in smartphone market"),
    ("Samsung", "Apple soars as Samsung plunges in smartphone market"),
    ("Nifty", "Nifty hits new high on strong FII inflows"),
    ("Wipro", "At Wipro, growth remains a mirage"),
    ("Market", "Market seeing patience, if not conviction"),
    ("PSU banks", "Near-term trajectory of PSU banks shaky, but buy stocks on dips"),
]

results = []

for entity, headline in test_cases:
    result = predict_sentiment(entity, headline, verbose=False)
    results.append({
        'Entity': entity,
        'Headline': headline[:50] + "..." if len(headline) > 50 else headline,
        'Sentiment': result['sentiment'].upper(),
        'Confidence': f"{result['confidence']:.1%}"
    })

results_df = pd.DataFrame(results)

print("Results:")
print("-" * 80)
display(results_df)

print("\nüìä Summary Statistics:")
print(f"   Total Predictions: {len(results)}")
print(f"   Positive: {sum(1 for r in results if r['Sentiment'] == 'POSITIVE')}")
print(f"   Neutral: {sum(1 for r in results if r['Sentiment'] == 'NEUTRAL')}")
print(f"   Negative: {sum(1 for r in results if r['Sentiment'] == 'NEGATIVE')}")

# Average confidence
avg_conf = sum(float(r['Confidence'].strip('%'))/100 for r in results) / len(results)
print(f"   Average Confidence: {avg_conf:.1%}")


üìã DEMO 3: Batch Analysis - Multiple Examples

Scenario: Analyzing multiple headlines at once

Results:
--------------------------------------------------------------------------------


Unnamed: 0,Entity,Headline,Sentiment,Confidence
0,Apple,Apple soars as Samsung plunges in smartphone m...,POSITIVE,96.6%
1,Samsung,Apple soars as Samsung plunges in smartphone m...,NEGATIVE,98.0%
2,Nifty,Nifty hits new high on strong FII inflows,POSITIVE,97.0%
3,Wipro,"At Wipro, growth remains a mirage",NEGATIVE,97.8%
4,Market,"Market seeing patience, if not conviction",NEUTRAL,96.5%
5,PSU banks,"Near-term trajectory of PSU banks shaky, but b...",NEGATIVE,98.8%



üìä Summary Statistics:
   Total Predictions: 6
   Positive: 2
   Neutral: 1
   Negative: 3
   Average Confidence: 97.4%
