In [2]:
!pip install transformers datasets accelerate -U peft evaluate

Collecting transformers
  Downloading transformers-4.34.0-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.6/519.6 kB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.5.0-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.6/85.6 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m11.1 MB/s[0m eta [36m0:00:

In [None]:
from transformers import BertTokenizerFast, BertForQuestionAnswering, TrainingArguments, Trainer
from datasets import load_dataset
from peft import PeftModelForQuestionAnswering, PromptEncoderConfig, PromptEncoder
from collections import Counter
import re
import string

# Load the SQuAD v2 dataset
dataset = load_dataset('squad_v2')
train_dataset = dataset['train']
eval_dataset = dataset['validation']

# Load the fast tokenizer
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Tokenization function
def tokenize_function(examples):
    encodings = tokenizer(
        examples['question'],
        examples['context'],
        truncation=True,
        padding='max_length',
        max_length=492,
        return_offsets_mapping=True
    )

    start_positions = []
    end_positions = []

    for i, (context, answer) in enumerate(zip(examples['context'], examples['answers'])):
        start_position = None
        end_position = None

        if answer['answer_start']:
            start_idx = answer['answer_start'][0]
            end_idx = start_idx + len(answer['text'][0])

            offset_mapping = encodings['offset_mapping'][i]

            for j, (offset_start, offset_end) in enumerate(offset_mapping):
                if offset_start <= start_idx and offset_end > start_idx:
                    start_position = j
                if offset_start < end_idx and offset_end >= end_idx:
                    end_position = j
                    break

            if start_position is not None and end_position is not None:
                start_positions.append(start_position)
                end_positions.append(end_position)
            else:
                start_positions.append(0)
                end_positions.append(0)
        else:
            start_positions.append(0)
            end_positions.append(0)

    encodings.update({'start_positions': start_positions, 'end_positions': end_positions})
    return encodings


# Tokenize the dataset first
train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)

# P-tuning Configuration
prompt_encoder_config = PromptEncoderConfig(
    peft_type="P_TUNING",
    task_type="QUESTION_ANS",
    num_virtual_tokens=20,
    token_dim=768,
    num_transformer_submodules=1,
    num_attention_heads=12,
    num_layers=12,
    encoder_reparameterization_type="MLP",
    encoder_hidden_size=768,
)

prompt_encoder = PromptEncoder(prompt_encoder_config)

# Create the model
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
peft_model = PeftModelForQuestionAnswering(model, prompt_encoder_config)
peft_model.print_trainable_parameters()

# Training arguments
training_args = TrainingArguments(
    evaluation_strategy="epoch",
    output_dir='./results',
    num_train_epochs=2,
    learning_rate=3e-5,
    per_device_train_batch_size=24,
    per_device_eval_batch_size=24,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
)

# Normalization Function
def normalize_answer(s):
    """Lower text and remove punctuation, articles, and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

# F1 Score Calculation
def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())

    if num_same == 0:
        return 0

    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)

    return f1

# Exact Match Score Calculation
def exact_match_score(prediction, ground_truth):
    return int(normalize_answer(prediction) == normalize_answer(ground_truth))

def compute_metrics(eval_pred):
    predictions_tuple, labels_tuple = eval_pred.predictions, eval_pred.label_ids
    start_logits, end_logits = predictions_tuple
    start_positions, end_positions = labels_tuple

    f1 = 0.0
    exact_match = 0

    for i in range(len(start_positions)):
        start_pred = start_logits[i].argmax()
        end_pred = end_logits[i].argmax()

        pred_ans = tokenizer.decode(eval_dataset[i]['input_ids'][start_pred:end_pred + 1])
        true_ans = tokenizer.decode(eval_dataset[i]['input_ids'][start_positions[i]:end_positions[i] + 1])

        f1 += f1_score(pred_ans, true_ans)
        exact_match += exact_match_score(pred_ans, true_ans)

    return {'f1': f1/len(start_positions), 'exact_match': exact_match/len(start_positions)}


# Trainer
trainer = Trainer(
    model=peft_model,  # Use the P-tuning-enhanced model
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

# Fine-tune
trainer.train()

# Save the model
trainer.save_model("./bert_p_tuning_squad")

Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,790,212 || all params: 110,681,860 || trainable%: 1.6174393888935368


Epoch,Training Loss,Validation Loss,F1,Exact Match
1,4.0317,3.251485,0.506949,0.506949


In [None]:
from huggingface_hub import notebook_login

notebook_login()