In [None]:
# BERT-based Sentiment Analysis in Google Colab
# ------------------------------------------------------
# This script:
# 1. Installs and imports Hugging Face Transformers and Torch
# 2. Uses a pre-trained BERT-based model for 3-way sentiment: positive, neutral, negative
# 3. (Optional) Tokenizes the input into subword tokens
# 4. Classifies sentiment and returns label + confidence score

# Install dependencies
!pip install -q transformers torch

from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Choose a 3-class sentiment model (Negative, Neutral, Positive)
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# Create a sentiment-analysis pipeline
sentiment_pipe = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True
)


def analyze_sentence(sentence: str) -> dict:
    """
    Tokenizes the input, runs it through BERT, and returns:
      - tokens: list of subword tokens
      - scores: dict mapping labels to confidence
      - label: final sentiment label
    """
    # Tokenize into subword tokens
    tokens = tokenizer.tokenize(sentence)

    # Run sentiment pipeline
    all_scores = sentiment_pipe(sentence)[0]
    # all_scores is a list of dicts: [{ 'label': 'LABEL_2', 'score': 0.85}, ...]

    # Map labels to human-readable form
    # MODEL maps: 0 -> Negative, 1 -> Neutral, 2 -> Positive
    label_map = {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
    scores = { label_map[item['label']]: round(item['score'], 4) for item in all_scores }

    # Choose the label with highest score
    final_label = max(scores, key=scores.get)

    return {
        'tokens': tokens,
        'scores': scores,
        'label': final_label
    }

if __name__ == '__main__':
    while True:
        sentence = input("Enter a sentence (or type 'quit' to exit): ")
        if sentence.lower() == 'quit':
            break
        result = analyze_sentence(sentence)

        print(f"Subword tokens: {result['tokens']}")
        print(f"Confidence scores: {result['scores']}")
        print(f"Predicted sentiment: {result['label']}\n")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m103.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Device set to use cpu


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Enter a sentence (or type 'quit' to exit): i hate you
Subword tokens: ['i', 'Ġhate', 'Ġyou']
Confidence scores: {'negative': 0.9649, 'neutral': 0.0288, 'positive': 0.0063}
Predicted sentiment: negative

Enter a sentence (or type 'quit' to exit): exit
Subword tokens: ['exit']
Confidence scores: {'negative': 0.2498, 'neutral': 0.5872, 'positive': 0.163}
Predicted sentiment: neutral

Enter a sentence (or type 'quit' to exit): quit
