In [None]:
# 1Ô∏è‚É£ Install Libraries (Same as before for consistency)
!pip install accelerate==0.34.2 transformers==4.44.2 datasets==2.20.0 torch==2.3.1 -U

# 2Ô∏è‚É£ Imports and Configuration
from transformers import (
    AutoModelForSequenceClassification, # üö® NEW: For Classification Tasks
    AutoTokenizer, 
    Trainer, 
    TrainingArguments,
    DataCollatorWithPadding # üö® NEW: For efficient batch padding
)
from datasets import load_dataset
import numpy as np
import evaluate

MODEL_NAME = "distilbert-base-uncased" # Fast, small model designed for classification

print("‚è≥ Loading Model and Tokenizer...")
# We must tell the model how many classes (labels) it needs to predict. SST-2 has 2 (positive/negative).
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# DistilBERT uses a special [PAD] token, so we don't need the EOS hack from GPT-2.
print("‚úÖ Model loaded.")

In [None]:
# 3Ô∏è‚É£ Load SST-2 Classification Dataset
print("‚è≥ Loading SST-2 dataset...")
dataset = load_dataset("glue", "sst2")

# 4Ô∏è‚É£ Data Tokenization
def tokenize_function(examples):
    # This prepares the input sentence into tokens (input_ids)
    # The [CLS] token is added at the start, [SEP] at the end, standard for BERT-models.
    return tokenizer(examples["sentence"], truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Remove the original text column and prepare the dataset for training
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])

# SST-2 labels are already 0 (negative) and 1 (positive), which is perfect.
print("üìù Dataset pre-processing complete.")

# 5Ô∏è‚É£ Data Collator: Efficiently pad sequences to the longest in the batch
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 6Ô∏è‚É£ Define Evaluation Metrics
# Classification requires metrics like Accuracy
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    # The Trainer passes the model predictions here
    predictions, labels = eval_pred
    # The predictions are logits (raw scores), so we take the argmax to get the predicted class (0 or 1)
    predictions = np.argmax(predictions, axis=1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

print("‚úÖ Evaluation metrics defined.")

In [None]:
# Use a subset for a faster demonstration (for full training, remove the .select() line)
train_subset = tokenized_datasets["train"].select(range(5000)) 
eval_subset = tokenized_datasets["validation"].select(range(500)) 


# 7Ô∏è‚É£ Define Training Arguments
OUTPUT_DIR = "./distilbert_sst2_results"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,          # Standard number of epochs for fine-tuning
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,          # Standard learning rate for fine-tuning
    logging_steps=100,
    evaluation_strategy="epoch", # Evaluate performance after each epoch
    save_strategy="epoch",       # Save a checkpoint after each epoch
    load_best_model_as_init_model=True, # Load the best model found during training
    report_to="none",
)

# 8Ô∏è‚É£ Initialize and Train the Trainer
print("üöÄ Initializing Trainer and starting fine-tuning...")
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_subset,
    eval_dataset=eval_subset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics # Pass our custom function
)

trainer.train()

print("‚úÖ Fine-tuning complete!")

# 9Ô∏è‚É£ Test Inference
from transformers import pipeline

# Create a classification pipeline using the fine-tuned model
classifier = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

test_sentences = [
    "This is an absolutely delightful film, highly recommended.",
    "The plot was confusing and the characters were flat."
]

print("\nüìù Testing Fine-Tuned Classifier:")
results = classifier(test_sentences)

# The output label will be 'LABEL_0' (Negative) or 'LABEL_1' (Positive)
# We can map these for clarity:
label_map = {0: "Negative", 1: "Positive"}

for sentence, result in zip(test_sentences, results):
    label_id = int(result['label'].split('_')[-1])
    sentiment = label_map[label_id]
    score = result['score'] * 100
    
    print(f"\n- Text: '{sentence}'")
    print(f"  Result: {sentiment} ({score:.2f}% confidence)")