In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir("/content/drive/My Drive/A2_NLP")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install torch torchvision datasets evaluate transformers accelerate -U



In [None]:
import pandas as pd
from datasets import Dataset, load_dataset
from transformers import BertTokenizerFast, BertForQuestionAnswering, Trainer, TrainingArguments
import torch

# Load the Pretrained BERT Model and Tokenizer

In [None]:
# Load pre-trained BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Load pre-trained BERT model for Question Answering
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Load SQuAD dataset
dataset = load_dataset("squad")

# Split dataset into training and validation
train_data = dataset["train"]
val_data = dataset["validation"]

In [None]:
train_data.shape

(87599, 5)

In [None]:
train_data[0]

{'id': '5733be284776f41900661182',
 'title': 'University_of_Notre_Dame',
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
 'answers': {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}}

# Tokenize the Data

In [None]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    contexts = examples["context"]
    answers = examples["answers"]

    # Tokenize inputs while aligning start and end positions of the answer
    tokenized_examples = tokenizer(
        questions,
        contexts,
        truncation=True,
        padding="max_length",
        max_length=384,
        return_offsets_mapping=True,
        return_tensors="pt",
    )

    # Compute start and end positions
    start_positions = []
    end_positions = []

    for i, ans in enumerate(answers):
        start_char = ans["answer_start"][0]  # Extract first answer's start index
        end_char = start_char + len(ans["text"][0])  # Compute end index

        offsets = tokenized_examples["offset_mapping"][i]

        # Find the token positions corresponding to the start and end of the answer
        start_token = end_token = None
        for idx, (start, end) in enumerate(offsets):
            if start <= start_char and end >= start_char:
                start_token = idx
            if start <= end_char and end >= end_char:
                end_token = idx
                break

        start_positions.append(start_token if start_token is not None else 0)
        end_positions.append(end_token if end_token is not None else 0)

    # Add labels to tokenized data
    tokenized_examples["start_positions"] = start_positions
    tokenized_examples["end_positions"] = end_positions

    return tokenized_examples

In [None]:
# Apply preprocessing to the dataset
train_dataset = train_data.map(preprocess_function, batched=True)
val_dataset = val_data.map(preprocess_function, batched=True)

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

# Fine-Tune BERT for Question Anwering task using SQUAD dataset

In [None]:
training_args = TrainingArguments(
    output_dir="./bert-qa",           # Path where model checkpoints & logs are saved.
    eval_strategy="epoch",            # Evaluates after each epoch
    save_strategy="epoch",
    learning_rate=3e-5,               # Standard for BERT fine-tuning
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,    # Matches train batch size for stability
    num_train_epochs=3,
    weight_decay=0.01,                # Standard for BERT
    logging_dir="./logs",
    logging_steps=50,                 # Logs loss/metrics every 50 steps
    fp16=True,                        # A100 supports FP16
    gradient_accumulation_steps=2,    # Helps when batch size is reduced
    warmup_steps=100,                 # Helps prevent instability at start
    load_best_model_at_end=True,      # Ensures best-performing model is saved
    report_to="none"                  # No external logging like Weights & Biases
)

In [None]:
from transformers import EarlyStoppingCallback

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

  trainer = Trainer(


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,1.3509,1.268489
2,1.0633,1.173776
3,0.8131,1.206328


TrainOutput(global_step=4107, training_loss=1.2418599039323435, metrics={'train_runtime': 920.0782, 'train_samples_per_second': 285.625, 'train_steps_per_second': 4.464, 'total_flos': 5.150100283496294e+16, 'train_loss': 1.2418599039323435, 'epoch': 3.0})

In [None]:
trainer.save_model("bert-qa-finetuned-A100")  # Save new fine-tuned model

## Evaluate the Fine-tuned SQuAD Model

1. Check Valdation Loss
- Measures loss on validation data
- This helps to check if the model is still improving or overfitting.

In [None]:
results = trainer.evaluate()
print("Evaluation Results:", results)

Evaluation Results: {'eval_loss': 1.1737759113311768, 'eval_runtime': 15.1721, 'eval_samples_per_second': 696.672, 'eval_steps_per_second': 21.816, 'epoch': 3.0}


2. Run SQuAD Evaluation for EM & F1 Score

- Measures how well the model predicts answers (accuracy-based)
- Gives real-world performance insight on Exact Match (EM) & F1 Score.

In [None]:
import torch

def answer_question(question, context, max_answer_length=30):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Detect GPU or CPU
    model.to(device)

    # Tokenize inputs
    inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512)
    inputs = {key: val.to(device) for key, val in inputs.items()}  # Move to correct device

    # Run inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Convert logits to probabilities
    start_probs = torch.nn.functional.softmax(outputs.start_logits, dim=-1)
    end_probs = torch.nn.functional.softmax(outputs.end_logits, dim=-1)

    # Get the best start & end token indices
    start_idx = torch.argmax(start_probs)
    end_idx = torch.argmax(end_probs)

    # Ensure the predicted span is valid
    if end_idx < start_idx or (end_idx - start_idx) > max_answer_length:
        return "No valid answer found."

    # Convert token IDs back to text
    answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx + 1], skip_special_tokens=True)

    return answer

In [None]:
import evaluate

# Load the SQuAD evaluation metric
metric = evaluate.load("squad")

val_df = val_data.to_pandas()

# Generate predictions for validation set
val_df["predicted_answer"] = val_df.apply(lambda row: answer_question(row["question"], row["context"]), axis=1)

formatted_predictions = [
    {"id": str(i), "prediction_text": row["predicted_answer"].strip().lower()}  # Convert to lowercase
    for i, row in val_df.iterrows()
]

formatted_references = [
    {"id": str(i),
     "answers": {
         "text": [ans.lower() for ans in row["answers"]["text"]],  # Convert to lowercase
         "answer_start": list(row["answers"]["answer_start"])
     }
    } for i, row in val_df.iterrows()
]

# Compute Exact Match (EM) and F1 Score
results = metric.compute(predictions=formatted_predictions, references=formatted_references)

# Print evaluation results
print(f"Exact Match (EM): {results['exact_match']:.2f}%")
print(f"F1 Score: {results['f1']:.2f}%")

Exact Match (EM): 62.02%
F1 Score: 71.99%


In [None]:
val_df.head()

Unnamed: 0,id,title,context,question,answers,predicted_answer
0,56be4db0acb8001400a502ec,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the AFC at Super Bo...,"{'text': ['Denver Broncos', 'Denver Broncos', ...",denver broncos
1,56be4db0acb8001400a502ed,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team represented the NFC at Super Bo...,"{'text': ['Carolina Panthers', 'Carolina Panth...",carolina panthers
2,56be4db0acb8001400a502ee,Super_Bowl_50,Super Bowl 50 was an American football game to...,Where did Super Bowl 50 take place?,"{'text': ['Santa Clara, California', 'Levi's S...","santa clara, california"
3,56be4db0acb8001400a502ef,Super_Bowl_50,Super Bowl 50 was an American football game to...,Which NFL team won Super Bowl 50?,"{'text': ['Denver Broncos', 'Denver Broncos', ...",denver broncos
4,56be4db0acb8001400a502f0,Super_Bowl_50,Super Bowl 50 was an American football game to...,What color was used to emphasize the 50th anni...,"{'text': ['gold', 'gold', 'gold'], 'answer_sta...","golden anniversary "" with various gold"


## Save the Fine-tuned BERT Model (fine-tuned using SQuAD)

In [None]:
trainer.save_model("bert-qa-finetuned")