In [1]:
!pip -q install torch transformers datasets evaluate accelerate peft numpy


In [9]:
pip install -U transformers accelerate datasets evaluate


Note: you may need to restart the kernel to use updated packages.


In [7]:
import os, collections
import numpy as np
import torch

from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForQuestionAnswering,
    TrainingArguments,
    Trainer,
    default_data_collator,
)
import evaluate
from peft import LoraConfig, get_peft_model, TaskType


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
MODEL_NAME = "bert-base-uncased"
DATASET_NAME = "squad_v2"     # start with this
OUTPUT_DIR = "./artifacts/model"  # change to Drive path if you want

MAX_LENGTH = 384
DOC_STRIDE = 128
BATCH_SIZE = 8
EPOCHS = 2
LR = 3e-5

USE_LORA = True
MERGE_LORA_BEFORE_SAVE = True

os.makedirs(OUTPUT_DIR, exist_ok=True)


In [9]:
def prepare_train_features(examples, tokenizer):
    questions = [q.lstrip() for q in examples["question"]]

    tokenized = tokenizer(
        questions,
        examples["context"],
        truncation="only_second",
        max_length=MAX_LENGTH,
        stride=DOC_STRIDE,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    overflow_to_sample_mapping = tokenized.pop("overflow_to_sample_mapping")
    offset_mapping = tokenized.pop("offset_mapping")

    start_positions = []
    end_positions = []

    for i, offsets in enumerate(offset_mapping):
        input_ids = tokenized["input_ids"][i]
        cls_index = input_ids.index(tokenizer.cls_token_id)

        sequence_ids = tokenized.sequence_ids(i)
        sample_index = overflow_to_sample_mapping[i]
        answers = examples["answers"][sample_index]

        # No-answer case (SQuAD v2)
        if len(answers["answer_start"]) == 0:
            start_positions.append(cls_index)
            end_positions.append(cls_index)
            continue

        start_char = answers["answer_start"][0]
        answer_text = answers["text"][0]
        end_char = start_char + len(answer_text)

        # Find context token range
        token_start_index = 0
        while sequence_ids[token_start_index] != 1:
            token_start_index += 1

        token_end_index = len(input_ids) - 1
        while sequence_ids[token_end_index] != 1:
            token_end_index -= 1

        # If answer not fully in span -> CLS
        if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
            start_positions.append(cls_index)
            end_positions.append(cls_index)
            continue

        # Move to start
        while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
            token_start_index += 1
        start_positions.append(token_start_index - 1)

        # Move to end
        while offsets[token_end_index][1] >= end_char:
            token_end_index -= 1
        end_positions.append(token_end_index + 1)

    tokenized["start_positions"] = start_positions
    tokenized["end_positions"] = end_positions
    return tokenized


def prepare_validation_features(examples, tokenizer):
    questions = [q.lstrip() for q in examples["question"]]

    tokenized = tokenizer(
        questions,
        examples["context"],
        truncation="only_second",
        max_length=MAX_LENGTH,
        stride=DOC_STRIDE,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )

    sample_mapping = tokenized.pop("overflow_to_sample_mapping")
    offset_mapping = tokenized["offset_mapping"]

    tokenized["example_id"] = []
    for i in range(len(tokenized["input_ids"])):
        sequence_ids = tokenized.sequence_ids(i)
        sample_index = sample_mapping[i]
        tokenized["example_id"].append(examples["id"][sample_index])

        tokenized["offset_mapping"][i] = [
            (o if sequence_ids[k] == 1 else None)
            for k, o in enumerate(offset_mapping[i])
        ]
    return tokenized


In [10]:
def postprocess_qa_predictions(examples, features, raw_predictions, n_best_size=20, max_answer_length=30):
    all_start_logits, all_end_logits = raw_predictions

    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    features_per_example = collections.defaultdict(list)
    for i, f in enumerate(features):
        features_per_example[example_id_to_index[f["example_id"]]].append(i)

    predictions = collections.OrderedDict()

    for example_index, example in enumerate(examples):
        feature_indices = features_per_example[example_index]
        context = example["context"]

        best = {"text": "", "score": -1e9}

        for fi in feature_indices:
            start_logits = all_start_logits[fi]
            end_logits = all_end_logits[fi]
            offsets = features[fi]["offset_mapping"]

            start_idxs = np.argsort(start_logits)[-n_best_size:][::-1]
            end_idxs = np.argsort(end_logits)[-n_best_size:][::-1]

            for s in start_idxs:
                for e in end_idxs:
                    if s >= len(offsets) or e >= len(offsets): 
                        continue
                    if offsets[s] is None or offsets[e] is None:
                        continue
                    if e < s:
                        continue
                    if (e - s + 1) > max_answer_length:
                        continue

                    start_char, _ = offsets[s]
                    _, end_char = offsets[e]
                    text = context[start_char:end_char]
                    score = start_logits[s] + end_logits[e]

                    if score > best["score"]:
                        best = {"text": text, "score": score}

        predictions[example["id"]] = best["text"]

    return predictions


metric = evaluate.load(DATASET_NAME)

def compute_metrics(p, eval_examples, eval_features):
    preds = postprocess_qa_predictions(eval_examples, eval_features, p.predictions)

    formatted_predictions = [
        {"id": k, "prediction_text": v, "no_answer_probability": 0.0}
        for k, v in preds.items()
    ]
    references = [{"id": ex["id"], "answers": ex["answers"]} for ex in eval_examples]

    result = metric.compute(predictions=formatted_predictions, references=references)
    return {"exact_match": float(result.get("exact", 0.0)), "f1": float(result.get("f1", 0.0))}


Using the latest cached version of the module from C:\Users\deepthi r g\.cache\huggingface\modules\evaluate_modules\metrics\evaluate-metric--squad_v2\bd2753381689b3f5bd1f4d85d23b9e2764cf7a26ca1821bcc729f1ee660d1560 (last modified on Thu Jan 22 15:48:04 2026) since it couldn't be found locally at evaluate-metric--squad_v2, or remotely on the Hugging Face Hub.


In [11]:
def postprocess_qa_predictions(examples, features, raw_predictions, n_best_size=20, max_answer_length=30):
    all_start_logits, all_end_logits = raw_predictions

    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
    features_per_example = collections.defaultdict(list)
    for i, f in enumerate(features):
        features_per_example[example_id_to_index[f["example_id"]]].append(i)

    predictions = collections.OrderedDict()

    for example_index, example in enumerate(examples):
        feature_indices = features_per_example[example_index]
        context = example["context"]

        best = {"text": "", "score": -1e9}

        for fi in feature_indices:
            start_logits = all_start_logits[fi]
            end_logits = all_end_logits[fi]
            offsets = features[fi]["offset_mapping"]

            start_idxs = np.argsort(start_logits)[-n_best_size:][::-1]
            end_idxs = np.argsort(end_logits)[-n_best_size:][::-1]

            for s in start_idxs:
                for e in end_idxs:
                    if s >= len(offsets) or e >= len(offsets): 
                        continue
                    if offsets[s] is None or offsets[e] is None:
                        continue
                    if e < s:
                        continue
                    if (e - s + 1) > max_answer_length:
                        continue

                    start_char, _ = offsets[s]
                    _, end_char = offsets[e]
                    text = context[start_char:end_char]
                    score = start_logits[s] + end_logits[e]

                    if score > best["score"]:
                        best = {"text": text, "score": score}

        predictions[example["id"]] = best["text"]

    return predictions


metric = evaluate.load(DATASET_NAME)

def compute_metrics(p, eval_examples, eval_features):
    preds = postprocess_qa_predictions(eval_examples, eval_features, p.predictions)

    formatted_predictions = [
        {"id": k, "prediction_text": v, "no_answer_probability": 0.0}
        for k, v in preds.items()
    ]
    references = [{"id": ex["id"], "answers": ex["answers"]} for ex in eval_examples]

    result = metric.compute(predictions=formatted_predictions, references=references)
    return {"exact_match": float(result.get("exact", 0.0)), "f1": float(result.get("f1", 0.0))}


Using the latest cached version of the module from C:\Users\deepthi r g\.cache\huggingface\modules\evaluate_modules\metrics\evaluate-metric--squad_v2\bd2753381689b3f5bd1f4d85d23b9e2764cf7a26ca1821bcc729f1ee660d1560 (last modified on Thu Jan 22 15:48:04 2026) since it couldn't be found locally at evaluate-metric--squad_v2, or remotely on the Hugging Face Hub.


In [12]:
dataset = load_dataset(DATASET_NAME)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME)

if USE_LORA:
    lora_config = LoraConfig(
        task_type=TaskType.QUESTION_ANS,
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        inference_mode=False,
    )
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()


'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /datasets/squad_v2/resolve/main/README.md (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1017)')))"), '(Request ID: e7696351-63dc-4b8d-8a2e-b45144506f0f)')' thrown while requesting HEAD https://huggingface.co/datasets/squad_v2/resolve/main/README.md
Retrying in 1s [Retry 1/5].
'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /datasets/squad_v2/resolve/main/README.md (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1017)')))"), '(Request ID: 2a6dc8bf-ebec-4a35-b33a-3b120c83d294)')' thrown while requesting HEAD https://huggingface.co/datasets/squad_v2/resolve/main/README.md
Retrying in 2s [Retry 2/5].
'(MaxRetry

trainable params: 591,362 || all params: 109,484,548 || trainable%: 0.5401


In [None]:
train_features = dataset["train"].map(
    lambda x: prepare_train_features(x, tokenizer),
    batched=True,
    remove_columns=dataset["train"].column_names,
)

eval_features = dataset["validation"].map(
    lambda x: prepare_validation_features(x, tokenizer),
    batched=True,
    remove_columns=dataset["validation"].column_names,
)

args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    do_train=True,
    do_eval=True,
    learning_rate=LR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_steps=50,
    report_to="none",
    fp16=torch.cuda.is_available(),
    save_steps=500,
    eval_steps=500,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_features,
    eval_dataset=eval_features,
    data_collator=default_data_collator,
    tokenizer=tokenizer,
    compute_metrics=lambda p: compute_metrics(p, dataset["validation"], eval_features),
)

trainer.train()


  trainer = Trainer(
  super().__init__(loader)


Step,Training Loss
50,5.9314
100,5.7041
150,5.2325
200,4.6691
250,4.1685
300,3.883
350,3.7055
400,3.5326


In [None]:
final_model = model
if USE_LORA and MERGE_LORA_BEFORE_SAVE:
    final_model = model.merge_and_unload()

final_model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("Saved to:", OUTPUT_DIR)
print("Files:", os.listdir(OUTPUT_DIR))
