In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer

model_name = "bert-base-uncased"  # You can switch to "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load dataset
dataset = load_dataset("glue", "sst2")

# Tokenize
def tokenize(batch):
    return tokenizer(batch["sentence"], truncation=True, padding="max_length")

tokenized = dataset.map(tokenize, batched=True)
tokenized = tokenized.rename_column("label", "labels")
tokenized.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
from transformers import TrainingArguments, Trainer
from sklearn.metrics import accuracy_score

import transformers
print(transformers.__version__)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc}

training_args = TrainingArguments(
    output_dir="./models",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    logging_dir="./logs",
    logging_steps=50,
    save_steps=500,
    disable_tqdm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    compute_metrics=compute_metrics,
)


4.51.3


ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=0.26.0'`

In [None]:
trainer.train()
trainer.save_model("./models/bert-sst2")


In [None]:
eval_result = trainer.evaluate(tokenized["validation"])
print("Evaluation Results:", eval_result)

# Save to file
with open("results/eval_metrics.txt", "w") as f:
    f.write(str(eval_result))


In [None]:
from transformers import pipeline

zero_model = pipeline("text-classification", model="bert-base-uncased")

sample = "The movie was absolutely terrible and a waste of time."
print(f"Zero-shot result for: '{sample}'")
print(zero_model(sample))

In [11]:
import accelerate
print(accelerate.__version__)


ModuleNotFoundError: No module named 'accelerate'