In [None]:
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

# Load and tokenize the Shakespeare dataset
file_path = "shakespeare.txt"
with open(file_path, "r", encoding="utf-8") as file:
    text = file.read()

# Tokenize the text using BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encoded_text = tokenizer(text, return_tensors='tf', padding=True, truncation=True)

# Create a TF dataset
train_dataset = tf.data.Dataset.from_tensor_slices(({"input_ids": encoded_text["input_ids"]}, {"labels": encoded_text["input_ids"]}))

# Fine-tuning arguments
training_args = TrainingArguments(
    output_dir="./bert_finetuned",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Load pre-trained BERT model
bert_model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')

# Create a Trainer
trainer = Trainer(
    model=bert_model,
    args=training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer),
    train_dataset=train_dataset,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
bert_model.save_pretrained("./bert_finetuned")


FileNotFoundError: ignored