In [1]:
from transformers import BartTokenizer, BartForQuestionAnswering
import torch



  from .autonotebook import tqdm as notebook_tqdm


In [15]:
# Initialize the BART tokenizer and model
tokenizer = BartTokenizer.from_pretrained( "valhalla/bart-large-finetuned-squadv1")
model = BartForQuestionAnswering.from_pretrained( "valhalla/bart-large-finetuned-squadv1")
# Define the context, question, and answer
context = "The capital of France is Paris."
question = "What is the capital of France?"
answer = "Paris"

context_tokens = tokenizer(context, return_tensors='pt')['input_ids'].view(-1)
question_tokens = tokenizer(question, return_tensors='pt')['input_ids'].view(-1)
answer_tokens = tokenizer(answer, return_tensors='pt')['input_ids'].view(-1)

print(f"Context tokens: {context_tokens}, \n"
      f"Question tokens: {question_tokens}, \n"
      f"Answer tokens: {answer_tokens}, \n",
      f"Padding token id: {tokenizer.pad_token_id}, \n",
      f"BOS token id: {tokenizer.bos_token_id}, \n",
      f"EOS token id: {tokenizer.eos_token_id}, \n",)

combined_tokens = torch.cat([context_tokens[:-1], question_tokens[1:-1], answer_tokens[1:]], dim=0)
context_question_tokens = torch.cat([context_tokens[:-1], question_tokens[1:]], dim=0).unsqueeze(0)
answer_start_idx = len(context_tokens) - 1 + len(question_tokens) - 2
answer_end_idx = answer_start_idx + len(answer_tokens) - 2

combined_tokens = combined_tokens.unsqueeze(0)
answer_start_idx = torch.tensor([answer_start_idx]).unsqueeze(0)
answer_end_idx = torch.tensor([answer_end_idx]).unsqueeze(0)

You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.
You passed along `num_labels=3` with an incompatible id to label map: {'0': 'LABEL_0', '1': 'LABEL_1'}. The number of labels wil be overwritten to 2.


Context tokens: tensor([   0,  133,  812,    9, 1470,   16, 2201,    4,    2]), 
Question tokens: tensor([   0, 2264,   16,    5,  812,    9, 1470,  116,    2]), 
Answer tokens: tensor([    0, 32826,     2]), 
 Padding token id: 1, 
 BOS token id: 0, 
 EOS token id: 2, 



In [25]:
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Perform a forward pass and compute the loss
model.train()
outputs = model(input_ids=combined_tokens, start_positions=answer_start_idx, end_positions=answer_end_idx)
loss = outputs.loss

# Backpropagate the loss and perform an optimization step
loss.backward()
optimizer.step()

# Perform inference on the same data
model.eval()
with torch.no_grad():
    outputs = model(input_ids=context_question_tokens)

for key, value in outputs.__dict__.items():
    output_string = f"{key}: {value}" if not isinstance(value, torch.Tensor) else f"{key}: {value.shape}"
    # print(output_string)

In [44]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AdamW, get_linear_schedule_with_warmup
import torch
from tqdm.auto import tqdm

# Initialize the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set up the question-answer pair as a single string
question = "What is the capital of France?"
answer = "The capital of France is Paris."
text = question + " " + answer

# Encode the text, and return tensors
inputs = tokenizer.encode_plus(text, return_tensors="pt", add_special_tokens=True)

# Extract the input_ids and attention_mask
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# Set up the optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=1e-5)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=1)

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.train()
input_ids = input_ids.to(device)
attention_mask = attention_mask.to(device)
for i in tqdm(range(100)):
    # Zero the gradients
    optimizer.zero_grad()
    # Forward pass
    outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)

    # Get the loss
    loss = outputs.loss

    # Backward pass
    loss.backward()

    # Update weights
    optimizer.step()

    # Update the learning rate
    scheduler.step()
    print(loss.item())

# Do a prediction (inference)
model.eval()

# Let's ask the model the same question
input_ids = tokenizer.encode(question, return_tensors="pt").to(device)

# Generate a response
generated = model.generate(input_ids, max_length=50, num_beams=5, temperature=1.5, no_repeat_ngram_size=2)
generated_answer = tokenizer.decode(generated[:, input_ids.shape[-1]:][0], skip_special_tokens=True)

print(f"Generated answer: {generated_answer}")


  3%|▎         | 3/100 [00:00<00:04, 20.35it/s]

3.5292856693267822
2.5621626377105713
2.5013720989227295
2.8336524963378906
2.499972343444824


  8%|▊         | 8/100 [00:00<00:04, 19.78it/s]

2.569439172744751
2.568347454071045
2.5246505737304688
2.4872758388519287
2.0676064491271973


 14%|█▍        | 14/100 [00:00<00:04, 19.59it/s]

2.5730204582214355
2.6153359413146973
2.6455748081207275
2.724306344985962


 18%|█▊        | 18/100 [00:00<00:04, 19.59it/s]

2.322098970413208
3.0020980834960938
2.742515802383423
2.289710521697998
2.3434908390045166


 22%|██▏       | 22/100 [00:01<00:03, 19.54it/s]

2.7114408016204834
2.3565027713775635
2.7029733657836914
2.065701723098755


 26%|██▌       | 26/100 [00:01<00:03, 19.55it/s]

2.5974442958831787
2.4911653995513916
2.45788311958313
2.506070852279663


 30%|███       | 30/100 [00:01<00:03, 19.55it/s]

2.3668932914733887
2.6329641342163086
2.79579496383667
2.931165933609009


 34%|███▍      | 34/100 [00:01<00:03, 17.45it/s]

2.47560715675354
2.96134352684021
2.9773855209350586
2.551283121109009


 38%|███▊      | 38/100 [00:01<00:03, 18.52it/s]

2.604534387588501
2.386500120162964
2.4230661392211914
2.4694020748138428


 42%|████▏     | 42/100 [00:02<00:03, 16.89it/s]

2.2694835662841797
2.8106610774993896
2.2715163230895996
2.8672382831573486


 46%|████▌     | 46/100 [00:02<00:03, 15.87it/s]

2.6258745193481445
2.5032567977905273
2.563673973083496
2.29408597946167


 50%|█████     | 50/100 [00:02<00:03, 15.53it/s]

2.661142110824585
2.4053564071655273
2.5236589908599854
2.9230830669403076


 55%|█████▌    | 55/100 [00:03<00:02, 18.33it/s]

2.811673164367676
2.58351731300354
2.7362098693847656
2.2809839248657227
2.7075917720794678


 59%|█████▉    | 59/100 [00:03<00:02, 18.69it/s]

2.5175986289978027
2.688176155090332
2.836557626724243
2.2819600105285645


 63%|██████▎   | 63/100 [00:03<00:02, 17.51it/s]

2.1921603679656982
3.1061019897460938
2.3989274501800537
4.018486022949219


 67%|██████▋   | 67/100 [00:03<00:02, 16.19it/s]

2.7878267765045166
2.612661600112915
2.637922525405884
2.784435510635376


 71%|███████   | 71/100 [00:03<00:01, 15.57it/s]

2.3776395320892334
2.651190757751465
2.6549582481384277
2.4860148429870605


 75%|███████▌  | 75/100 [00:04<00:01, 15.38it/s]

2.716874122619629
2.9579555988311768
2.3414266109466553
2.7561819553375244


 79%|███████▉  | 79/100 [00:04<00:01, 17.20it/s]

2.276392698287964
2.6744790077209473
2.6531546115875244
2.9054059982299805
2.564725399017334


 85%|████████▌ | 85/100 [00:04<00:00, 18.41it/s]

2.2358267307281494
2.898979425430298
2.725952386856079
2.4379076957702637


 89%|████████▉ | 89/100 [00:04<00:00, 18.95it/s]

2.532442331314087
2.1732614040374756
2.5355582237243652
2.7551121711730957
2.9170961380004883


 93%|█████████▎| 93/100 [00:05<00:00, 19.23it/s]

2.461033582687378
2.6076159477233887
2.9964075088500977
2.7833333015441895


 97%|█████████▋| 97/100 [00:05<00:00, 19.39it/s]

2.504783868789673
2.8673207759857178
2.2754602432250977
3.0448973178863525


100%|██████████| 100/100 [00:05<00:00, 18.02it/s]
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


2.2931196689605713
2.8419203758239746
Generated answer: 

The capital is Paris. It is located in the heart of the French capital. The capital was founded in 1789, and it is still there today. In 1791, the city was divided into two
