In [1]:
import torch
from transformers import BertTokenizerFast, BertForQuestionAnswering
from torch.utils.data import DataLoader


In [2]:
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
data = [
    {
        'context': 'The sky is blue and the grass is green.',
        'question': 'What color is the sky?',
        'answer': 'blue'
    },
    {
        'context': 'The sky is blue and the grass is green.',
        'question': 'What color is the sky?',
        'answer': 'blue'
    },
    # Add more question-answer pairs
]

In [4]:
# Prepare your data
inputs = [tokenizer(qa['question'], qa['context'], truncation=True,
                    padding='max_length', max_length=512) for qa in data]
answers = [qa['answer'] for qa in data]

In [5]:
# Convert data into torch tensors
for input in inputs:
    for key in input:
        input[key] = torch.tensor(input[key])

In [6]:
# Get start and end positions of answers
for input, answer in zip(inputs, answers):
    start = input['input_ids'].tolist().index(
        tokenizer.encode(answer, add_special_tokens=False)[0])
    end = start + len(tokenizer.encode(answer, add_special_tokens=False)) - 1
    input['start_positions'] = torch.tensor(start)
    input['end_positions'] = torch.tensor(end)

In [7]:
# Create a DataLoader for your data
dataloader = DataLoader(inputs, batch_size=8)

In [8]:
# Define a basic training loop
device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [9]:
for epoch in range(30):  # Train for 3 epochs
    for batch in dataloader:
        # Move batch to device
        for key in batch:
            batch[key] = batch[key].to(device)

        # Forward pass
        outputs = model(**batch)

        # Backward pass and optimization
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f'Epoch {epoch+1} Loss {loss.item()}')

Epoch 1 Loss 6.332866668701172
Epoch 2 Loss 6.167593002319336
Epoch 3 Loss 5.922531604766846
Epoch 4 Loss 5.587536811828613
Epoch 5 Loss 5.358992099761963
Epoch 6 Loss 5.095773220062256
Epoch 7 Loss 4.771237373352051
Epoch 8 Loss 4.642463684082031
Epoch 9 Loss 4.518061637878418
Epoch 10 Loss 4.051026344299316
Epoch 11 Loss 3.873443603515625
Epoch 12 Loss 3.8055920600891113
Epoch 13 Loss 3.318305492401123
Epoch 14 Loss 3.12795352935791
Epoch 15 Loss 2.925119161605835
Epoch 16 Loss 2.6708922386169434
Epoch 17 Loss 2.5038745403289795
Epoch 18 Loss 2.259643077850342
Epoch 19 Loss 2.055074691772461
Epoch 20 Loss 1.8324575424194336
Epoch 21 Loss 1.6323204040527344
Epoch 22 Loss 1.4868766069412231
Epoch 23 Loss 1.3070985078811646
Epoch 24 Loss 1.1371068954467773
Epoch 25 Loss 0.8396139144897461
Epoch 26 Loss 0.7627565264701843
Epoch 27 Loss 0.7226934432983398
Epoch 28 Loss 0.6475984454154968
Epoch 29 Loss 0.5844744443893433
Epoch 30 Loss 0.4695398509502411


In [10]:
model.save_pretrained('my_qa_model')

In [11]:
context = "The sky is blue and the grass is green."
question = "What color is the grass?"

# Prepare the question and the context for the model
inputs = tokenizer(question, context, return_tensors='pt')

# Run the model
outputs = model(**inputs)

In [12]:
# Get the most probable start and end tokens
answer_start = torch.argmax(outputs.start_logits)
answer_end = torch.argmax(outputs.end_logits)

In [13]:
input_ids = inputs["input_ids"].tolist()[0]
answer = tokenizer.convert_tokens_to_string(
    tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end+1]))

In [14]:
print("Answer: ", answer) 

Answer:  blue
