In [None]:
from datasets import load_dataset
import numpy as np
from transformers import BertTokenizerFast,BertForQuestionAnswering,TrainingArguments,Trainer, pipeline
df = load_dataset("SQuAD")
train_subset = df["train"].select(range(3000))
val_subset = df["validation"].select(range(500))
print("✅ Dataset loaded successfully")
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    contexts = examples["context"]

    tokenized_examples = tokenizer(
        questions,
        contexts,
        truncation="only_second",
        max_length=384,
        stride=128,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
    offset_mapping = tokenized_examples.pop("offset_mapping")

    start_positions = []
    end_positions = []

    for i, offsets in enumerate(offset_mapping):
        input_ids = tokenized_examples["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)
        sequence_ids = tokenized_examples.sequence_ids(i)

        sample_idx = sample_mapping[i]
        answer = examples["answers"][sample_idx]
        start_char = answer["answer_start"][0]
        end_char = start_char + len(answer["text"][0])
        context_index = 1

        token_start = None
        token_end = None

        for idx, (seq_id, (start, end)) in enumerate(zip(sequence_ids, offsets)):
            if seq_id == context_index:
                if start <= start_char < end:
                    token_start = idx
                if start < end_char <= end:
                    token_end = idx
                    break

        if token_start is None or token_end is None:
            start_positions.append(cls_index)
            end_positions.append(cls_index)
        else:
            start_positions.append(token_start)
            end_positions.append(token_end)

    tokenized_examples["start_positions"] = start_positions
    tokenized_examples["end_positions"] = end_positions
    return tokenized_examples

tokenized_train = train_subset.map(preprocess_function, batched=True, remove_columns=train_subset.column_names, desc="Tokenizing train data")
tokenized_valid = val_subset.map(preprocess_function, batched=True, remove_columns=val_subset.column_names, desc="Tokenizing validation data")
print("✅ Tokenization complete")

model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=500,
    save_steps=500,
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    save_total_limit=2,
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    tokenizer=tokenizer,
)
from accelerate import Accelerator
Accelerator().free_memory()

trainer.train()
eval_results = trainer.evaluate()
print("📈 Evaluation Results:", eval_results)

predictions = trainer.predict(tokenized_valid)
start_logits, end_logits = predictions.predictions
start_preds = np.argmax(start_logits, axis=1)
end_preds = np.argmax(end_logits, axis=1)
f1_like_score = np.mean(start_preds == end_preds)
print(f"✅ Approximate F1-like score: {f1_like_score:.2f}")

model.save_pretrained("./fine_tuned_bert_qa")
tokenizer.save_pretrained("./fine_tuned_bert_qa")
print("✅ Fine-tuned model and tokenizer saved to './fine_tuned_bert_qa'")


qa_pipeline = pipeline(
    "question-answering",
    model="./fine_tuned_bert_qa",
    tokenizer="./fine_tuned_bert_qa"
)

print("💬 Interactive Q&A System Ready!")
print("Type 'quit' anytime to stop.\n")

while True:
    context = input("Enter context (or 'quit' to exit): ")
    if context.lower() == "quit":
        break

    question = input("Enter your question: ")
    if question.lower() == "quit":
        break

    result = qa_pipeline({
        "context": context,
        "question": question
    })

    print(f"\nAnswer: {result['answer']}")
    print("-" * 50)



✅ Dataset loaded successfully


Tokenizing train data:   0%|          | 0/3000 [00:00<?, ? examples/s]

Tokenizing validation data:   0%|          | 0/500 [00:00<?, ? examples/s]

✅ Tokenization complete


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


🚀 Training started...


Step,Training Loss,Validation Loss
500,1.21,1.62293


✅ Training completed


📈 Evaluation Results: {'eval_loss': 1.6030237674713135, 'eval_runtime': 3.0564, 'eval_samples_per_second': 170.137, 'eval_steps_per_second': 21.267, 'epoch': 2.0}
✅ Approximate F1-like score: 0.25


Device set to use cuda:0


✅ Fine-tuned model and tokenizer saved to './fine_tuned_bert_qa'
💬 Interactive Q&A System Ready!
Type 'quit' anytime to stop.

Enter context (or 'quit' to exit): The Apollo program was the third United States human spaceflight program carried out by NASA, which accomplished landing the first humans on the Moon from 1969 to 1972. It was first conceived during Dwight D. Eisenhower's administration as a three-person spacecraft to follow the one-person Project Mercury.
Enter your question: Who carried out the Apollo program?





Answer: NASA
--------------------------------------------------
