### **State University of Campinas - UNICAMP** </br>
**Course**: MC886A </br>
**Professor**: Marcelo da Silva Reis </br>
**TA (PED)**: Marcos Vinicius Souza Freire

---

### **Hands-On: Transformers and Attention Mechanisms**
##### Notebook: 02 Sentiment Analysis with Transformers
---

**Objectives:** Let's analise the sentiment movie whatchers from the IMDB

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torch.nn.utils.rnn import pad_sequence
from collections import Counter
import re
import os
import requests
import tarfile
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from tqdm import tqdm
import time

In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# ====================== Data Preparation ======================
class IMDBDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_length=512):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        tokens = re.findall(r'\b\w+\b', text.lower())
        token_ids = [self.vocab.get(token, self.vocab['<unk>']) for token in tokens[:self.max_length]]
        return torch.tensor(token_ids, dtype=torch.long), torch.tensor(self.labels[idx], dtype=torch.float)

In [4]:
def collate_fn(batch):
    inputs, labels = zip(*batch)
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0)
    attention_mask = (padded_inputs != 0).float()
    return padded_inputs.to(device), attention_mask.to(device), torch.stack(labels).to(device)

def download_imdb_data():
    url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
    filename = "aclImdb_v1.tar.gz"
    if not os.path.exists("aclImdb"):
        print("Downloading IMDB dataset...")
        response = requests.get(url, stream=True)
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        with tarfile.open(filename, 'r:gz') as tar:
            tar.extractall()
        os.remove(filename)
    return "aclImdb"

def load_imdb_data(vocab_size=10000):
    data_dir = download_imdb_data()
    texts, labels = [], []

    # Load positive reviews
    pos_dir = os.path.join(data_dir, "train", "pos")
    for filename in tqdm(os.listdir(pos_dir), desc="Loading positive reviews"):
        with open(os.path.join(pos_dir, filename), 'r', encoding='utf-8') as f:
            texts.append(f.read())
            labels.append(1)  # Positive

    # Load negative reviews
    neg_dir = os.path.join(data_dir, "train", "neg")
    for filename in tqdm(os.listdir(neg_dir), desc="Loading negative reviews"):
        with open(os.path.join(neg_dir, filename), 'r', encoding='utf-8') as f:
            texts.append(f.read())
            labels.append(0)  # Negative

    # Build vocabulary
    token_counter = Counter()
    for text in tqdm(texts, desc="Building vocabulary"):
        tokens = re.findall(r'\b\w+\b', text.lower())
        token_counter.update(tokens)

    vocab = {'<pad>': 0, '<unk>': 1}
    for idx, (token, count) in enumerate(token_counter.most_common(vocab_size - 2), start=2):
        vocab[token] = idx

    return texts, labels, vocab, token_counter

def load_imdb_test_data(vocab):
    data_dir = "aclImdb"
    texts, labels = [], []

    # Load positive reviews
    pos_dir = os.path.join(data_dir, "test", "pos")
    for filename in tqdm(os.listdir(pos_dir), desc="Loading test positive reviews"):
        with open(os.path.join(pos_dir, filename), 'r', encoding='utf-8') as f:
            texts.append(f.read())
            labels.append(1)  # Positive

    # Load negative reviews
    neg_dir = os.path.join(data_dir, "test", "neg")
    for filename in tqdm(os.listdir(neg_dir), desc="Loading test negative reviews"):
        with open(os.path.join(neg_dir, filename), 'r', encoding='utf-8') as f:
            texts.append(f.read())
            labels.append(0)  # Negative

    return texts, labels

In [5]:
def train_val_split(texts, labels, val_size=0.2, random_seed=42):
    """PyTorch-based train-validation split"""
    # Set random seed for reproducibility
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)

    # Create indices and shuffle
    indices = list(range(len(texts)))
    np.random.shuffle(indices)

    # Calculate split point
    split_point = int(len(texts) * (1 - val_size))

    # Split indices
    train_indices = indices[:split_point]
    val_indices = indices[split_point:]

    # Split data
    train_texts = [texts[i] for i in train_indices]
    train_labels = [labels[i] for i in train_indices]
    val_texts = [texts[i] for i in val_indices]
    val_labels = [labels[i] for i in val_indices]

    return train_texts, val_texts, train_labels, val_labels

In [6]:
# ====================== Transformer Components ======================
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        position = torch.arange(max_len).unsqueeze(1)
        # More stable calculation of div_term
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

In [7]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads

        self.wq = nn.Linear(d_model, d_model)
        self.wk = nn.Linear(d_model, d_model)
        self.wv = nn.Linear(d_model, d_model)
        self.wo = nn.Linear(d_model, d_model)

    def forward(self, x, attention_mask):
        batch_size, seq_len, _ = x.shape

        # Linear projections
        Q = self.wq(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        K = self.wk(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        V = self.wv(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)

        # Scaled dot-product attention
        scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.head_dim)

        # Apply attention mask
        attention_mask = attention_mask.unsqueeze(1).unsqueeze(1)  # [B, 1, 1, T]
        scores = scores.masked_fill(attention_mask == 0, -1e9)

        attn_weights = torch.softmax(scores, dim=-1)
        attn_output = torch.matmul(attn_weights, V)

        # Concatenate heads
        attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, -1)

        # Final linear projection
        return self.wo(attn_output), attn_weights

In [8]:
class TransformerBlock(nn.Module):
    def __init__(self, d_model, num_heads, ff_dim, dropout=0.1):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.norm1 = nn.LayerNorm(d_model)
        self.ff = nn.Sequential(
            nn.Linear(d_model, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, d_model)
        )
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, attention_mask):
        # Self-attention
        attn_output, attn_weights = self.attention(x, attention_mask)
        x = x + self.dropout(attn_output)
        x = self.norm1(x)

        # Feed-forward
        ff_output = self.ff(x)
        x = x + self.dropout(ff_output)
        x = self.norm2(x)
        return x, attn_weights

In [9]:
class TransformerClassifier(nn.Module):
    def __init__(self, vocab_size, d_model, num_heads, ff_dim, num_layers, dropout=0.1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model, padding_idx=0)
        self.pos_encoding = PositionalEncoding(d_model)
        self.layers = nn.ModuleList([
            TransformerBlock(d_model, num_heads, ff_dim, dropout)
            for _ in range(num_layers)
        ])
        self.pooler = nn.Linear(d_model, d_model)
        self.classifier = nn.Linear(d_model, 1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input_ids, attention_mask):
        # Embedding + positional encoding
        x = self.embedding(input_ids)
        x = self.pos_encoding(x)

        # Transformer blocks
        all_attentions = []
        for layer in self.layers:
            x, attn_weights = layer(x, attention_mask)
            all_attentions.append(attn_weights)

        # Mean pooling (improved pooling strategy)
        # Apply attention mask to ignore padding tokens
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(x.size()).float()
        sum_embeddings = torch.sum(x * input_mask_expanded, 1)
        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        pooled = sum_embeddings / sum_mask

        # Classification
        logits = self.classifier(self.dropout(pooled)).squeeze(-1)
        return torch.sigmoid(logits), all_attentions

In [10]:
# ====================== Training & Evaluation ======================
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    total_loss, total_correct = 0, 0
    batch_losses = []

    for inputs, masks, labels in tqdm(dataloader, desc="Training"):
        optimizer.zero_grad()
        outputs, _ = model(inputs, masks)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        batch_losses.append(loss.item())
        predictions = (outputs > 0.5).float()
        total_correct += (predictions == labels).sum().item()

    avg_loss = total_loss / len(dataloader)
    accuracy = total_correct / (len(dataloader.dataset))
    return avg_loss, accuracy, batch_losses

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss, total_correct = 0, 0
    all_predictions, all_labels = [], []

    with torch.no_grad():
        for inputs, masks, labels in tqdm(dataloader, desc="Evaluating"):
            outputs, _ = model(inputs, masks)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            predictions = (outputs > 0.5).float()
            total_correct += (predictions == labels).sum().item()

            all_predictions.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    accuracy = total_correct / (len(dataloader.dataset))
    return avg_loss, accuracy, all_predictions, all_labels

In [11]:
# ====================== Visualization Functions ======================
def plot_training_curves(train_losses, train_accs, val_losses, val_accs):
    """Plot training and validation curves"""
    epochs = list(range(1, len(train_losses) + 1))

    # Create subplots
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=('Loss Curves', 'Accuracy Curves'),
        x_title='Epoch',
        y_title='Value'
    )

    # Loss curves
    fig.add_trace(
        go.Scatter(x=epochs, y=train_losses, mode='lines+markers',
                  name='Train Loss', line=dict(color='blue')),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(x=epochs, y=val_losses, mode='lines+markers',
                  name='Val Loss', line=dict(color='red')),
        row=1, col=1
    )

    # Accuracy curves
    fig.add_trace(
        go.Scatter(x=epochs, y=train_accs, mode='lines+markers',
                  name='Train Acc', line=dict(color='green')),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(x=epochs, y=val_accs, mode='lines+markers',
                  name='Val Acc', line=dict(color='orange')),
        row=1, col=2
    )

    fig.update_layout(height=400, showlegend=True, title_text="Training Progress")
    fig.show()

def plot_prediction_distribution(predictions, labels, title="Validation Prediction Analysis"):
    """Plot distribution of predictions"""
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=(f'{title} - Distribution', f'{title} - Sorted Scores'),
    )

    # Histogram of predictions by true label
    pos_preds = [p for p, l in zip(predictions, labels) if l == 1]
    neg_preds = [p for p, l in zip(predictions, labels) if l == 0]

    fig.add_trace(
        go.Histogram(x=pos_preds, name='Positive', opacity=0.7, nbinsx=50),
        row=1, col=1
    )
    fig.add_trace(
        go.Histogram(x=neg_preds, name='Negative', opacity=0.7, nbinsx=50),
        row=1, col=1
    )

    # ROC-like curve (sorted predictions)
    sorted_indices = np.argsort(predictions)
    sorted_preds = np.array(predictions)[sorted_indices]
    sorted_labels = np.array(labels)[sorted_indices]

    fig.add_trace(
        go.Scatter(x=list(range(len(sorted_preds))), y=sorted_preds,
                  mode='lines', name='Prediction Scores'),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(x=list(range(len(sorted_labels))), y=sorted_labels,
                  mode='markers', name='True Labels', opacity=0.3),
        row=1, col=2
    )

    fig.update_layout(height=400, showlegend=True, title_text=title)
    fig.show()

def visualize_attention(text, model, vocab, max_length=128):
    """Visualize attention weights"""
    # Tokenize input
    tokens = re.findall(r'\b\w+\b', text.lower())
    token_ids = [vocab.get(token, vocab['<unk>']) for token in tokens[:max_length]]
    input_ids = torch.tensor([token_ids], dtype=torch.long).to(device)
    attention_mask = (input_ids != 0).float().to(device)

    # Get model outputs
    model.eval()
    with torch.no_grad():
        _, all_attentions = model(input_ids, attention_mask)

    # Get attention from first layer, first head
    attn_weights = all_attentions[0][0, 0].cpu().numpy()
    valid_length = int(attention_mask[0].sum().item())
    attn_weights = attn_weights[:valid_length, :valid_length]
    tokens = tokens[:valid_length]

    # Create heatmap
    fig = go.Figure(data=go.Heatmap(
        z=attn_weights,
        x=tokens,
        y=tokens,
        colorscale='RdYlBu_r',
        hoverongaps=False,
        colorbar=dict(title="Attention Weight")
    ))

    fig.update_layout(
        title="Transformer Self-Attention (Layer 1, Head 1)",
        xaxis_title="Key Tokens",
        yaxis_title="Query Tokens",
        width=700,
        height=700,
        xaxis_tickangle=-45,
        font=dict(size=10)
    )

    print(f"\nAttention Statistics:")
    print(f"Max attention weight: {attn_weights.max():.4f}")
    print(f"Min attention weight: {attn_weights.min():.4f}")
    print(f"Mean attention weight: {attn_weights.mean():.4f}")

    fig.show()
    return fig

def plot_vocab_distribution(token_counter, top_k=50):
    """Plot vocabulary distribution using actual frequencies"""
    # Get most common words and their frequencies
    most_common = token_counter.most_common(top_k)
    words = [word for word, _ in most_common]
    frequencies = [count for _, count in most_common]

    fig = go.Figure(data=go.Bar(x=words, y=frequencies))
    fig.update_layout(
        title=f"Top {top_k} Most Frequent Words",
        xaxis_title="Words",
        yaxis_title="Frequency",
        xaxis_tickangle=-45,
        height=500
    )
    fig.show()

In [12]:
# ====================== Main Execution ======================
def main():
    # Hyperparameters
    BATCH_SIZE = 32
    D_MODEL = 256
    NUM_HEADS = 8
    FF_DIM = 512
    NUM_LAYERS = 4
    NUM_EPOCHS = 5
    LEARNING_RATE = 3e-4
    VOCAB_SIZE = 10000

    # Load data
    print("Loading IMDB data...")
    train_texts, train_labels, vocab, token_counter = load_imdb_data(VOCAB_SIZE)

    # PyTorch-based train-validation split
    train_texts, val_texts, train_labels, val_labels = train_val_split(
        train_texts, train_labels, val_size=0.2, random_seed=42
    )

    # Load official test data
    test_texts, test_labels = load_imdb_test_data(vocab)

    print(f"Train samples: {len(train_texts)}")
    print(f"Validation samples: {len(val_texts)}")
    print(f"Test samples: {len(test_texts)}")

    # Visualize vocabulary distribution using actual frequencies
    plot_vocab_distribution(token_counter)

    # Create datasets and dataloaders
    train_dataset = IMDBDataset(train_texts, train_labels, vocab)
    val_dataset = IMDBDataset(val_texts, val_labels, vocab)
    test_dataset = IMDBDataset(test_texts, test_labels, vocab)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)

    # Initialize model
    model = TransformerClassifier(
        vocab_size=len(vocab),
        d_model=D_MODEL,
        num_heads=NUM_HEADS,
        ff_dim=FF_DIM,
        num_layers=NUM_LAYERS
    ).to(device)

    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

    # Training setup
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCELoss()

    # Training loop with tracking
    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    all_batch_losses = []

    print("\nStarting training...")
    start_time = time.time()

    for epoch in range(NUM_EPOCHS):
        epoch_start = time.time()

        train_loss, train_acc, batch_losses = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, val_acc, val_preds, val_labels = evaluate(model, val_loader, criterion)

        train_losses.append(train_loss)
        train_accs.append(train_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
        all_batch_losses.extend(batch_losses)

        epoch_time = time.time() - epoch_start

        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS} ({epoch_time:.1f}s)")
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.4f}")

    total_time = time.time() - start_time
    print(f"\nTotal training time: {total_time:.1f}s")

    # Plot training curves
    plot_training_curves(train_losses, train_accs, val_losses, val_accs)

    # Plot validation prediction distribution
    plot_prediction_distribution(val_preds, val_labels, "Validation Set")

    # Evaluate on test set
    test_loss, test_acc, test_preds, test_labels = evaluate(model, test_loader, criterion)
    print(f"\nFinal Test Performance:")
    print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f}")

    # Plot test prediction distribution
    plot_prediction_distribution(test_preds, test_labels, "Test Set")

    # Plot batch loss progression
    fig = go.Figure(data=go.Scatter(y=all_batch_losses, mode='lines', name='Batch Loss'))
    fig.update_layout(
        title="Batch Loss Progression",
        xaxis_title="Batch",
        yaxis_title="Loss",
        height=400
    )
    fig.show()

    # Attention visualization
    sample_text = "This movie was absolutely fantastic! The acting was superb and the plot was captivating."
    print(f"\nVisualizing attention for: '{sample_text}'")
    visualize_attention(sample_text, model, vocab)

    # Test predictions on sample texts
    test_samples = [
        "I've never seen such a terrible movie in my life.",
        "The director did an amazing job with this film!",
        "It was okay, but nothing special.",
        "Absolutely brilliant cinematography and outstanding performances!",
        "Boring and predictable storyline with poor acting."
    ]

    print("\nSentiment Predictions:")
    sample_preds = []
    for text in test_samples:
        tokens = re.findall(r'\b\w+\b', text.lower())
        token_ids = [vocab.get(token, vocab['<unk>']) for token in tokens]
        input_ids = torch.tensor([token_ids], dtype=torch.long).to(device)
        attention_mask = (input_ids != 0).float().to(device)

        model.eval()
        with torch.no_grad():
            output, _ = model(input_ids, attention_mask)

        sentiment = "Positive" if output.item() > 0.5 else "Negative"
        confidence = output.item() if output.item() > 0.5 else 1 - output.item()
        sample_preds.append(output.item())
        print(f"Text: {text}")
        print(f"Prediction: {sentiment} (confidence: {confidence:.3f})")
        print()

    # Plot sample predictions
    fig = go.Figure(data=go.Bar(
        x=[f"Sample {i+1}" for i in range(len(sample_preds))],
        y=sample_preds,
        text=[f"{p:.3f}" for p in sample_preds],
        textposition='auto'
    ))
    fig.add_hline(y=0.5, line_dash="dash", line_color="red",
                 annotation_text="Decision Threshold")
    fig.update_layout(
        title="Sample Predictions",
        xaxis_title="Test Samples",
        yaxis_title="Prediction Score",
        height=400
    )
    fig.show()

In [13]:
if __name__ == "__main__":
    main()

Loading IMDB data...
Downloading IMDB dataset...


Loading positive reviews: 100%|██████████| 12500/12500 [00:00<00:00, 50315.64it/s]
Loading negative reviews: 100%|██████████| 12500/12500 [00:00<00:00, 49893.42it/s]
Building vocabulary: 100%|██████████| 25000/25000 [00:02<00:00, 10481.07it/s]
Loading test positive reviews: 100%|██████████| 12500/12500 [00:00<00:00, 31872.21it/s]
Loading test negative reviews: 100%|██████████| 12500/12500 [00:00<00:00, 32153.82it/s]


Train samples: 20000
Validation samples: 5000
Test samples: 25000


Model parameters: 4,734,465

Starting training...


Training: 100%|██████████| 625/625 [02:13<00:00,  4.67it/s]
Evaluating: 100%|██████████| 157/157 [00:13<00:00, 11.89it/s]



Epoch 1/5 (147.0s)
Train Loss: 0.5118 | Train Acc: 0.7346
Val Loss:   0.4100 | Val Acc:   0.8152


Training: 100%|██████████| 625/625 [02:13<00:00,  4.69it/s]
Evaluating: 100%|██████████| 157/157 [00:13<00:00, 11.93it/s]



Epoch 2/5 (146.5s)
Train Loss: 0.3509 | Train Acc: 0.8475
Val Loss:   0.3576 | Val Acc:   0.8418


Training: 100%|██████████| 625/625 [02:13<00:00,  4.70it/s]
Evaluating: 100%|██████████| 157/157 [00:13<00:00, 11.94it/s]



Epoch 3/5 (146.2s)
Train Loss: 0.2932 | Train Acc: 0.8752
Val Loss:   0.3437 | Val Acc:   0.8482


Training: 100%|██████████| 625/625 [02:12<00:00,  4.71it/s]
Evaluating: 100%|██████████| 157/157 [00:13<00:00, 11.98it/s]



Epoch 4/5 (145.8s)
Train Loss: 0.2364 | Train Acc: 0.9037
Val Loss:   0.3975 | Val Acc:   0.8398


Training: 100%|██████████| 625/625 [02:12<00:00,  4.71it/s]
Evaluating: 100%|██████████| 157/157 [00:13<00:00, 11.98it/s]



Epoch 5/5 (145.8s)
Train Loss: 0.1942 | Train Acc: 0.9242
Val Loss:   0.3630 | Val Acc:   0.8486

Total training time: 731.3s


Evaluating: 100%|██████████| 782/782 [01:05<00:00, 11.91it/s]



Final Test Performance:
Test Loss: 0.3601 | Test Acc: 0.8502



Visualizing attention for: 'This movie was absolutely fantastic! The acting was superb and the plot was captivating.'

Attention Statistics:
Max attention weight: 0.9627
Min attention weight: 0.0000
Mean attention weight: 0.0714



Sentiment Predictions:
Text: I've never seen such a terrible movie in my life.
Prediction: Negative (confidence: 0.913)

Text: The director did an amazing job with this film!
Prediction: Positive (confidence: 0.983)

Text: It was okay, but nothing special.
Prediction: Negative (confidence: 0.987)

Text: Absolutely brilliant cinematography and outstanding performances!
Prediction: Positive (confidence: 0.991)

Text: Boring and predictable storyline with poor acting.
Prediction: Negative (confidence: 0.995)

