In [None]:
pip install transformers datasets evaluate torch

In [None]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, Trainer, TrainingArguments, TrainerCallback
from datasets import load_dataset
from sklearn.metrics import f1_score
import evaluate
import pandas as pd

# Load dataset SQuAD v2
dataset = load_dataset("rajpurkar/squad_v2")

# Limit the dataset to the first 100 rows for both training and validation
# train_dataset = dataset["train"].select(range(100))  # First 100 rows of the training set
# valid_dataset = dataset["validation"].select(range(100))  # First 100 rows of the validation set

# Load model and tokenizer
model_name = "bert-base-uncased"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Preprocessing data
def preprocess_function(examples):
    questions = examples["question"]
    contexts = examples["context"]
    answers = examples["answers"]

    inputs = tokenizer(questions, contexts, max_length=256, truncation=True, 
                       adding="max_length", return_offsets_mapping=True)

    start_positions, end_positions = [], []
    for i, offsets in enumerate(inputs["offset_mapping"]):
        if len(answers[i]["text"]) == 0:  # Handle no answer case in SQuAD v2
            start_positions.append(0)
            end_positions.append(0)
        else:
            answer = answers[i]["text"][0]
            start_char = answers[i]["answer_start"][0]
            end_char = start_char + len(answer)

            start_token, end_token = 0, 0
            for j, offset in enumerate(offsets):
                if offset[0] <= start_char and offset[1] > start_char:
                    start_token = j
                if offset[0] < end_char and offset[1] >= end_char:
                    end_token = j

            start_positions.append(start_token)
            end_positions.append(end_token)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    inputs.pop("offset_mapping")  # Not needed for training
    return inputs

# Tokenizing dataset
# tokenized_train = train_dataset.map(preprocess_function, batched=True)
# tokenized_valid = valid_dataset.map(preprocess_function, batched=True)
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)

# Load evaluation metrics using the evaluate library
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
exact_match_metric = evaluate.load("exact_match")

# Function to compute evaluation metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    start_logits, end_logits = logits  # Split the logits into start and end logits
    start_positions, end_positions = labels  # Labels should be a tuple of (start, end) positions

    # Get the predicted start and end positions
    start_predictions = start_logits.argmax(-1)  # Start position prediction
    end_predictions = end_logits.argmax(-1)  # End position prediction

    # Compute F1 for start and end predictions
    f1_score_start = f1_score(start_positions, start_predictions, average='macro')  # Change to 'macro' or 'micro'
    f1_score_end = f1_score(end_positions, end_predictions, average='macro')  # Change to 'macro' or 'micro'

    # Calculate Exact Match for start and end predictions
    exact_match_start = (start_predictions == start_positions).sum() / len(start_predictions)
    exact_match_end = (end_predictions == end_positions).sum() / len(end_predictions)

    # Calculate overall F1 score and Exact Match score by averaging the start and end results
    f1_total = (f1_score_start + f1_score_end) / 2
    exact_match_total = (exact_match_start + exact_match_end) / 2

    # Calculate overall accuracy by checking exact matches in both start and end predictions
    accuracy = ((start_predictions == start_positions) & (end_predictions == end_positions)).sum() / len(start_predictions)

    return {
        "accuracy": accuracy,
        "f1": f1_total,
        "exact_match": exact_match_total,
    }


# Custom callback to print metrics after each epoch
class PrintMetricsCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, **kwargs):
        if trainer.state.log_history:
            last_log = trainer.state.log_history[-1]

            epoch = last_log.get("epoch", "N/A")
            train_loss = last_log.get("loss", "N/A")
            val_loss = last_log.get("eval_loss", "N/A")
            accuracy = last_log.get("eval_accuracy", "N/A")
            f1_score = last_log.get("eval_f1", "N/A")
            exact_match = last_log.get("eval_exact_match", "N/A")
            learning_rate = trainer.optimizer.param_groups[0]["lr"]

            # Print table
            table = pd.DataFrame(
                [[epoch, train_loss, val_loss, accuracy, f1_score, exact_match, learning_rate]],
                columns=["Epoch", "Train Loss", "Val Loss", "Accuracy", "F1 Score", "Exact Match", "Learning Rate"]
            )
            print("\n" + table.to_string(index=False) + "\n")

            # Save log history to CSV
            df = pd.DataFrame(trainer.state.log_history)
            df.to_csv("training_results.csv", index=False)

    def on_train_end(self, args, state, control, **kwargs):
        # Ensure the last epoch is saved to CSV
        df = pd.DataFrame(trainer.state.log_history)
        df.to_csv("training_results_end.csv", index=False)

# Training configuration
training_args = TrainingArguments(
    output_dir="./distilbert-squad",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=100,
    save_steps=500,
    warmup_steps=500,
    fp16=True,  # Mixed precision training (if GPU supports it)
)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.add_callback(PrintMetricsCallback())

# Start training
trainer.train()

# Save trained model
trainer.save_model("./bert-squad")
