In [1]:
import numpy as np
print(np.__version__)

1.26.4


In [2]:
import torch
from transformers import BertTokenizer, BertForQuestionAnswering
from transformers import Trainer, TrainingArguments
from datasets import load_dataset, Dataset

# Load a pre-trained BERT model and tokenizer for question-answering
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased')

# Example: Stack Overflow-like Q&A data
qa_data = {
    "question": ["What is a neural network?", "How does backpropagation work?"],
    "context": [
        "A neural network is a computational model that is inspired by the way biological neural networks in the human brain process information.",
        "Backpropagation is the process of training a neural network by adjusting the weights based on the error rate."
    ],
    "answer": ["A neural network is a computational model...", "Backpropagation is the process..."],
    "start_positions": [0, 0],  # These indicate where the answer starts in the context
    "end_positions": [13, 12]   # These indicate where the answer ends in the context
}

# Convert the data into a Dataset
dataset = Dataset.from_dict(qa_data)

# Tokenize the data (BERT requires tokenized inputs)
def preprocess_data(examples):
    tokenized_examples = tokenizer(
        examples['question'], 
        examples['context'], 
        truncation=True, 
        padding="max_length", 
        return_tensors="pt", 
        max_length=512
    )
    
    # Adding the labels (start and end positions of answers)
    tokenized_examples["start_positions"] = examples["start_positions"]
    tokenized_examples["end_positions"] = examples["end_positions"]
    
    return tokenized_examples

# Apply tokenization to the dataset
tokenized_dataset = dataset.map(preprocess_data, batched=True)

# Define training arguments for the Trainer API
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    evaluation_strategy="steps",     # Evaluate every few steps
    per_device_train_batch_size=4,   # Batch size per GPU/CPU
    per_device_eval_batch_size=4,    # Evaluation batch size
    num_train_epochs=3,              # Number of epochs
    logging_dir='./logs',            # Log directory
    logging_steps=10,                # Log every 10 steps
)

# Initialize the Trainer
trainer = Trainer(
    model=model,                       # The BERT model
    args=training_args,                # Training arguments
    train_dataset=tokenized_dataset,   # The training dataset
)

# Train the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained('./bert-qa-model')
tokenizer.save_pretrained('./bert-qa-model')

# Inference function
def answer_question(question, context):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]

    outputs = model(**inputs)
    answer_start = torch.argmax(outputs.start_logits)  # Get start position of the answer
    answer_end = torch.argmax(outputs.end_logits) + 1  # Get end position of the answer

    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    return answer

# Example usage
context = "A neural network is a computational model that is inspired by the way biological neural networks in the human brain process information."
question = "What is a neural network?"
print(f"Q: {question}\nA: {answer_question(question, context)}")




RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_tf_utils because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.