In [22]:
import os
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
import torch

In [23]:
# Set random seed for reproducibility
np.random.seed(42)

# Load dataset
print("Loading SST-2 dataset...")
dataset = load_dataset("glue", "sst2", cache_dir="./dataset_cache")


Loading SST-2 dataset...


In [24]:
# Load tokenizer
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=128)

encoded_dataset = dataset.map(tokenize_function, batched=True)

In [25]:

# Prepare dataset for training
encoded_dataset = encoded_dataset.remove_columns(["sentence", "idx"])  # Remove unused columns
encoded_dataset = encoded_dataset.rename_column("label", "labels")  # Rename for Trainer
encoded_dataset.set_format("torch")  # Set format to PyTorch tensors

In [26]:
# Load model
print("Loading DistilBERT model...")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./sst2_results",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_dir="./sst2_logs",
    logging_steps=100,
    seed=42,
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading DistilBERT model...


In [27]:
# Define compute_metrics function for accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = (predictions == labels).mean()
    return {"accuracy": accuracy}

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics,
)

# Train and evaluate
print("Training on SST-2...")

trainer.train()

Training on SST-2...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1641,0.263846,0.90711
2,0.1063,0.330943,0.899083
3,0.0903,0.339414,0.909404


TrainOutput(global_step=6315, training_loss=0.14050425907400801, metrics={'train_runtime': 2936.0455, 'train_samples_per_second': 68.816, 'train_steps_per_second': 2.151, 'total_flos': 6691160124062208.0, 'train_loss': 0.14050425907400801, 'epoch': 3.0})

In [28]:

print("Evaluating on SST-2...")
eval_results = trainer.evaluate()
print(f"SST-2 Accuracy: {eval_results['eval_accuracy']:.4f}")

# Save results
with open("./sst2_results/eval_results.txt", "w") as f:
    f.write(f"SST-2 Accuracy: {eval_results['eval_accuracy']:.4f}\n")

print("Done! Results saved in ./sst2_results/")

Evaluating on SST-2...


SST-2 Accuracy: 0.9094
Done! Results saved in ./sst2_results/
