In [None]:
!pip install transformers torch datasets
!pip install transformers==4.21.0
!pip install accelerate==0.21.0

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer
import torch
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load the IMDb dataset
dataset = load_dataset('imdb')

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Rename columns to match the model's expected input
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# Shuffle the datasets to ensure a representative subset
tokenized_datasets['train'] = tokenized_datasets['train'].shuffle(seed=42)
tokenized_datasets['test'] = tokenized_datasets['test'].shuffle(seed=42)

# Split the dataset in half
train_size = len(tokenized_datasets['train'])
test_size = len(tokenized_datasets['test'])

small_train_dataset = tokenized_datasets['train'].select(range(train_size))
small_test_dataset = tokenized_datasets['test'].select(range(test_size))

In [None]:
# Load the pretrained BERT model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=3,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size for training
    per_device_eval_batch_size=8,    # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch"
)

# Create a Trainer instance
trainer = Trainer(
    model=model,                         # The Transformers model to be trained
    args=training_args,                  # Training arguments
    train_dataset=small_train_dataset,   # Training dataset
    eval_dataset=small_test_dataset      # Evaluation dataset
)

# Train the model
trainer.train()