**Cell 1: Install/Upgrade Libraries**

In [None]:
!pip install --upgrade transformers datasets accelerate evaluate

**Cell 2: Imports and GPU Check**

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

**Cell 3: Load Full Dataset and Create Splits**

In [None]:
# Load the full dataset from the Hugging Face datasets library.
# The "imdb" dataset is a popular dataset for sentiment analysis.
dataset = load_dataset("imdb")

# Access the training split of the dataset.
train_dataset = dataset["train"]

# Access the test split of the dataset.
test_dataset = dataset["test"]

# Print the size of the full training dataset.
print(f"Full training dataset size: {len(train_dataset)}")

# Print the size of the full test dataset.
print(f"Full test dataset size: {len(test_dataset)}")

**Cell 4: Preprocessing with the New Tokenizer**

In [None]:
# CHANGE 1: Using a more powerful model and its corresponding tokenizer
# We are switching from a simpler model (like BERT) to a more powerful one (RoBERTa-base).
# This involves loading the tokenizer specific to the "roberta-base" model.
model_name = "roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenizer function remains the same conceptually
# This function takes examples from the dataset and applies the tokenizer.
# - examples["text"]: Extracts the text content from the dataset examples.
# - padding="max_length": Pads the tokenized sequences to the maximum length the model can handle.
# - truncation=True: Truncates sequences that are longer than the maximum length.
# The function returns the tokenized inputs, including input_ids, attention_mask, etc.
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Apply the tokenizer to the full datasets
# The .map() method applies the tokenize_function to each example in the training and test datasets.
# batched=True allows the tokenizer to process multiple examples at once, which is more efficient.
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

**Cell 5: Load the New Pre-trained Model**

In [None]:
# Load the new, more powerful pre-trained model for sequence classification
# AutoModelForSequenceClassification automatically selects the appropriate model architecture
# based on the `model_name` ("roberta-base" in this case) and configures it for
# sequence classification (e.g., adding a classification head on top of the base model).
# num_labels=2 is specified because the IMDb dataset has two labels (positive/negative).
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Move the model to the selected device (GPU if available, otherwise CPU).
# This ensures that the model's parameters and computations are handled by the specified device,
# which is crucial for leveraging GPU acceleration during training.
model.to(device)

**Cell 6: Define Improved Training Arguments**

In [None]:
# CHANGE 2 & 3: Increasing epochs and tuning the learning rate
# This cell defines the training arguments using the TrainingArguments class from the transformers library.
training_args = TrainingArguments(
    output_dir="./results_roberta",  # Directory where the model checkpoints and outputs will be saved.
    num_train_epochs=4,              # <-- CHANGED: Increased the number of training epochs to 4 for potentially better performance.
    learning_rate=2e-5,              # <-- ADDED: Set the learning rate to 2e-5, a common value for fine-tuning transformers.
    per_device_train_batch_size=16,  # Batch size per GPU/CPU for training.
    per_device_eval_batch_size=16,   # Batch size per GPU/CPU for evaluation.
    weight_decay=0.01,               # Apply weight decay to prevent overfitting.
    logging_steps=500,               # Log training progress every 500 steps.
    eval_strategy="epoch",           # <-- CHANGED: Evaluate the model at the end of each epoch.
    save_strategy="epoch",           # Save the model checkpoint at the end of each epoch.
    load_best_model_at_end=True,     # Load the best model (based on the evaluation metric) at the end of training.

    # We could use Learning Rate Scheduler
    # lr_scheduler_type="cosine", # Use the cosine learning rate scheduler
    # warmup_steps=500,           # We still want the initial warmup
)

**Cell 7: Define Metrics and Train**

In [None]:
# Load the accuracy metric
# The `evaluate` library is used to load standard evaluation metrics.
# Here, we load the "accuracy" metric, which is suitable for classification tasks.
metric = evaluate.load("accuracy")

# Function to compute metrics
# This function is passed to the Trainer and is called at the end of each evaluation epoch.
# It takes `eval_pred` as input, which is a tuple containing the model's predicted logits and the true labels.
def compute_metrics(eval_pred):
    # Unpack the logits and labels from the input tuple.
    logits, labels = eval_pred
    # Get the predicted class index by finding the index of the maximum logit for each example.
    predictions = logits.argmax(axis=-1)
    # Compute the accuracy using the loaded metric and return the result.
    return metric.compute(predictions=predictions, references=labels)

# Create the Trainer
# The Trainer class from the transformers library is used to handle the training and evaluation loop.
trainer = Trainer(
    model=model,  # The pre-trained model to be trained.
    args=training_args,  # The training arguments defined in the previous cell.
    train_dataset=tokenized_train_dataset,  # The tokenized training dataset.
    eval_dataset=tokenized_test_dataset,  # The tokenized evaluation dataset.
    compute_metrics=compute_metrics,  # The function to compute evaluation metrics.
)

# Start the new, longer training run!
print("Starting improved training run...")
# Call the train() method to start the training process.
trainer.train()
print("Training finished!")

**Cell 8: Final Evaluation**

In [None]:
print("Evaluating the final model on the test set...")
# Call the evaluate() method of the Trainer to compute metrics on the evaluation dataset.
# This uses the best model saved during training (due to load_best_model_at_end=True in TrainingArguments).
evaluation_results = trainer.evaluate()

print("\n--- Final Evaluation Results ---")
# Print the accuracy obtained on the test set.
print(f"Accuracy: {evaluation_results['eval_accuracy']:.4f}")
# Print the loss obtained on the test set.
print(f"Loss: {evaluation_results['eval_loss']:.4f}")

**Cell 9: Use the Improved Model for Prediction**

In [None]:
# Import the softmax function from PyTorch for converting logits to probabilities.
from torch.nn.functional import softmax

# Define a list of example movie reviews to test the model on.
reviews = [
    "This movie was absolutely fantastic! The acting was brilliant and the plot was engaging.",
    "It was a complete waste of time. The plot was predictable and the characters were boring."
]
# Define the corresponding labels for the sentiment classes (Negative, Positive).
labels = ["Negative", "Positive"]

# Iterate through each review in the list.
for review in reviews:
    # Tokenize the current review using the previously loaded tokenizer.
    # - return_tensors="pt": Returns PyTorch tensors.
    # - padding=True: Pads the tokenized sequence to the maximum length.
    # - truncation=True: Truncates the sequence if it's longer than the maximum length.
    # - .to(device): Moves the input tensors to the same device as the model (GPU or CPU).
    inputs = tokenizer(review, return_tensors="pt", padding=True, truncation=True).to(device)

    # Disable gradient calculation for inference (no training is happening here).
    # This reduces memory usage and speeds up computation.
    with torch.no_grad():
        # Pass the tokenized inputs through the trained model to get the outputs (logits).
        outputs = model(**inputs)
        # Apply the softmax function to the output logits to get probability distributions over the classes.
        probabilities = softmax(outputs.logits, dim=1)
        # Get the index of the class with the highest probability, which is the model's prediction.
        prediction_index = torch.argmax(probabilities, dim=1).item()

    # Print a separator line for clarity.
    print("\n--------------------")
    # Print the original review.
    print(f"Review: '{review}'")
    # Print the predicted sentiment label using the prediction index.
    print(f"Prediction: {labels[prediction_index]}")
    # Print the confidence scores (probabilities) for both Negative and Positive classes.
    print(f"Confidence: Negative={probabilities[0][0]:.4f}, Positive={probabilities[0][1]:.4f}")