In [None]:
import torch
from torch.utils.data import Dataset
from datasets import load_dataset

# Load the SQuAD1 dataset
train_dataset = load_dataset("squad")["train"]
test_dataset = load_dataset("squad")["validation"]
train_dataset=train_dataset.select(range(100))
test_dataset=test_dataset.select(range(100))


In [None]:
class SquadDataset(Dataset):
    def __init__(self, dataset, tokenizer):
        self.tokenizer = tokenizer
        self.dataset = dataset
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        example = self.dataset[idx]
        context = example['context']
        question = example['question']
        answer = example['answers']['text'][0]
        
        # do encoding of the context and question 
        encoding = self.tokenizer.encode_plus(
            question,
            context,
            add_special_tokens=True,
            return_token_type_ids=True,
            return_attention_mask=True,
            padding='max_length',   
            max_length=384,
            truncation=True
        )
        
        # get start and end positions of answer in input_ids
        input_ids = encoding['input_ids']
        answer_start = example['answers']['answer_start'][0]
        answer_end = answer_start + len(answer)
        
        start_positions = []
        end_positions = []
        for i, token_id in enumerate(input_ids):
            if i == answer_start:
                start_positions.append(i)
            else:
                start_positions.append(-100)
            
            if i == answer_end:
                end_positions.append(i)
            else:
                end_positions.append(-100)
        
        # Create input tensors
        inputs = {
            'input_ids': torch.tensor(encoding['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(encoding['attention_mask'], dtype=torch.long),
            'token_type_ids': torch.tensor(encoding['token_type_ids'], dtype=torch.long),
            'start_positions': torch.tensor(start_positions, dtype=torch.float),  # start and end positions should be float
            'end_positions': torch.tensor(end_positions, dtype=torch.float)
        }
        
        return inputs, answer

In [None]:
from transformers import GPT2Tokenizer
from torch.utils.data import DataLoader

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
x=SquadDataset(train_dataset,tokenizer)


# Create the Dataloader
train_dataloader = DataLoader(
    SquadDataset(train_dataset, tokenizer),
    batch_size=16,
    shuffle=True
)
test_dataloader = DataLoader(
    SquadDataset(test_dataset, tokenizer),
    batch_size=16,
    shuffle=True
)

In [None]:
for batch in train_dataloader:
    
    print(f"Our context is:\n {batch[0]['input_ids']}")
#     print(f"Our context is:\n {batch['context'][0]}")
#     print(f"Question: {batch['question'][0]}")
#     print(f"Answer: {batch['answer'][0]}")
    break

In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

In [None]:
from transformers import GPT2ForQuestionAnswering

model = GPT2ForQuestionAnswering.from_pretrained("gpt2").to(device)

In [None]:
learning_rate = 5e-5
epochs = 3
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=learning_rate)

In [None]:
def train_loop(dataloader, model, optimizer):
    
    # set the model to training model
    model.train()
    
    for batch in dataloader:
        optimizer.zero_grad()
        
        # previous tokens
        input_ids = batch[0]['input_ids'].to(device)
        attention_mask = batch[0]['attention_mask'].to(device)
        token_type_ids = batch[0]['token_type_ids'].to(device)
        start_positions = batch[0]['start_positions'].to(device)
        end_positions = batch[0]['end_positions'].to(device)
        
        labels = {
            'start_positions': start_positions,
            'end_positions': end_positions
        }
        
       # get outputs from model
        outputs = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

        # calculate loss
        loss_start = nn.CrossEntropyLoss()(outputs.start_logits, start_positions)
        loss_end = nn.CrossEntropyLoss()(outputs.end_logits, end_positions)
        loss = (loss_start + loss_end) / 2  # average loss for start and end positions
        
        # backpropagation
        loss.backward()
        optimizer.step()
        

def test_loop(dataloader, model):
    # set the model of evaluation
    model.eval()
    val_loss = 0
    
    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    with torch.no_grad():
        for batch in dataloader:
            # previous tokens
            input_ids = batch[0]['input_ids'].to(device)
            attention_mask = batch[0]['attention_mask'].to(device)
            token_type_ids = batch[0]['token_type_ids'].to(device)
            start_positions = batch[0]['start_positions'].to(device)
            end_positions = batch[0]['end_positions'].to(device)
            answer = batch[0]['answer'].to(device)

            labels = {
                'start_positions': start_positions,
                'end_positions': end_positions
            }

           # get outputs from model
            inputs = tokenizer(question, text, return_tensors="pt")
            with torch.no_grad():
                outputs = model(**inputs)
            #outputs = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            answer_start_index = outputs.start_logits.argmax()
            answer_end_index = outputs.end_logits.argmax()

            predict_answer_tokens = input_ids[0, answer_start_index : answer_end_index + 1]

            # target is "nice puppet"
            target_start_index = torch.tensor([14])
            target_end_index = torch.tensor([15])

            outputs = model(input_ids, start_positions=target_start_index, end_positions=target_end_index)
            loss = outputs.loss
            # calculate loss
            '''loss_start = nn.CrossEntropyLoss()(outputs.start_logits, start_positions)
            loss_end = nn.CrossEntropyLoss()(outputs.end_logits, end_positions)
            loss = (loss_start + loss_end) / 2  # average loss for start and end positions
            
            val_loss += loss.item()
    
    # Print the validation loss for this epoch
    print(f"Validation Loss: {val_loss/len(dataloader)}")'''
    return outputs

In [None]:
import transformers
import torch.nn as nn
from tqdm import tqdm
transformers.logging.set_verbosity_error()

for t in tqdm(range(epochs)):
    print(f"Epoch {t+1}\n ---------------------------")
    train_loop(train_dataloader, model, optimizer)
    

print("Done!")

In [None]:

model.save_pretrained('fine_tuned_gpt_model')