# BERT Sentiment Analysis in Jupyter Notebook
## Complete Working Example with Real Text Samples

In [16]:
# Install required packages (run once)
# !pip install transformers torch

from transformers import AutoTokenizer
import os

# 1. Specify the correct model name
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"

# 2. Check for local directory conflicts
if os.path.exists(model_name.split('/')[-1]):
    raise ValueError(f"Local directory '{model_name.split('/')[-1]}' already exists and may cause conflicts!")

# 3. Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir="./custom_cache",  # Custom cache directory
    force_download=True          # Ignore cache and re-download
)
print("Tokenizer loaded successfully!")

Tokenizer loaded successfully!


In [17]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# 1. Load pre-trained model and tokenizer
# model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

In [18]:
# 2. Define test texts - TRY CHANGING THESE!
test_texts = [
    "The hotel room was clean and spacious, but the staff was rude.",  # Mixed
    "This is the worst product I've ever bought!",  # Clearly negative
    "Absolutely fantastic experience from start to finish!",  # Clearly positive
    "It's okay, nothing special.",  # Neutral
    "这家餐厅的服务太糟糕了，但食物还不错",  # Chinese mixed review
    "Ce film était incroyable! Je le recommande."  # French positive
    "とてもいい."
]

In [19]:
# 3. Sentiment analysis function
def analyze_sentiment(text):
    # Tokenize input
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    
    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Convert to probabilities
    probs = torch.softmax(outputs.logits, dim=1)
    predicted_class = torch.argmax(probs).item()
    
    # Rating labels (1-5 stars)
    rating_labels = {
        0: "⭐ (Very negative)",
        1: "⭐⭐ (Negative)", 
        2: "⭐⭐⭐ (Neutral)",
        3: "⭐⭐⭐⭐ (Positive)",
        4: "⭐⭐⭐⭐⭐ (Very positive)"
    }
    
    # Display results
    print(f"\nText: '{text}'")
    print(f"Predicted sentiment: {rating_labels[predicted_class]}")
    print("Probability distribution:")
    for i, prob in enumerate(probs[0]):
        print(f"{rating_labels[i]}: {prob*100:.1f}%")
    
    # Binary classification
    print(f"\nFinal verdict: {'Positive' if predicted_class >= 3 else 'Negative'}")
    print("="*60)

In [20]:
# 4. Run analysis on all test texts
for text in test_texts:
    analyze_sentiment(text)


Text: 'The hotel room was clean and spacious, but the staff was rude.'
Predicted sentiment: ⭐⭐⭐ (Neutral)
Probability distribution:
⭐ (Very negative): 5.5%
⭐⭐ (Negative): 30.2%
⭐⭐⭐ (Neutral): 48.7%
⭐⭐⭐⭐ (Positive): 14.1%
⭐⭐⭐⭐⭐ (Very positive): 1.5%

Final verdict: Negative

Text: 'This is the worst product I've ever bought!'
Predicted sentiment: ⭐ (Very negative)
Probability distribution:
⭐ (Very negative): 97.2%
⭐⭐ (Negative): 2.5%
⭐⭐⭐ (Neutral): 0.2%
⭐⭐⭐⭐ (Positive): 0.0%
⭐⭐⭐⭐⭐ (Very positive): 0.1%

Final verdict: Negative

Text: 'Absolutely fantastic experience from start to finish!'
Predicted sentiment: ⭐⭐⭐⭐⭐ (Very positive)
Probability distribution:
⭐ (Very negative): 0.1%
⭐⭐ (Negative): 0.1%
⭐⭐⭐ (Neutral): 0.2%
⭐⭐⭐⭐ (Positive): 2.8%
⭐⭐⭐⭐⭐ (Very positive): 96.8%

Final verdict: Positive

Text: 'It's okay, nothing special.'
Predicted sentiment: ⭐⭐⭐ (Neutral)
Probability distribution:
⭐ (Very negative): 0.6%
⭐⭐ (Negative): 10.6%
⭐⭐⭐ (Neutral): 84.7%
⭐⭐⭐⭐ (Positive): 4.0%
⭐⭐⭐⭐⭐ (Ve

## How to Use This Notebook
1. Run all cells sequentially (Shift+Enter)
2. To analyze your own text:
   - Edit the `test_texts` list in cell 3
   - Re-run cells 3 and 5
3. The model supports multiple languages (English, Chinese, French, etc.)

## Sample Output Preview
```
Text: 'The hotel room was clean and spacious, but the staff was rude.'
Predicted sentiment: ⭐⭐⭐ (Neutral)
Probability distribution:
⭐ (Very negative): 12.3%
⭐⭐ (Negative): 28.7%
⭐⭐⭐ (Neutral): 41.2%
⭐⭐⭐⭐ (Positive): 15.8%
⭐⭐⭐⭐⭐ (Very positive): 2.0%

Final verdict: Negative
============================================================
```