In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AdamW

In [None]:
questions_answers = [
    ("What is the color of the sky?", "Blue"),
    ("What do bees make?", "Honey"),
        ("What is the largest planet?", "Jupiter"),
    ("Who wrote Hamlet?", "Shakespeare"),
    ("What liquid do cars need?", "Fuel"),
    ("What is frozen water called?", "Ice"),
    ("Which animal is known as man's best friend?", "Dog"),
    ("What do we breathe?", "Air"),
    ("What color is a ruby?", "Red"),
    ("What do bees produce?", "Honey"),
    ("What is the opposite of cold?", "Hot"),
    ("What do we call a baby cat?", "Kitten"),
    ("What do you use to write on a blackboard?", "Chalk"),
    ("What is the capital of France?", "Paris"),
    ("What fruit is known for its potassium?", "Banana"),
    ("What is the hardest natural substance?", "Diamond"),
    ("What season follows summer?", "Autumn"),
    ("What is the currency of the USA?", "Dollar"),
    ("What is the primary language in Spain?", "Spanish"),
    ("What is the color of grass?", "Green")
]

In [None]:
class QADataset(Dataset):
    def __init__(self, tokenizer, qa_list, max_length):
        self.tokenizer = tokenizer
        self.qa_list = qa_list
        self.max_length = max_length

    def __len__(self):
        return len(self.qa_list)

    def __getitem__(self, idx):
        question, answer = self.qa_list[idx]
        encodings = self.tokenizer(f"{question} {answer}", truncation=True, max_length=self.max_length, padding="max_length", return_tensors="pt")
        return encodings.input_ids[0], encodings.attention_mask[0]

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [None]:
max_length = 32 # Define the maximum length for the sequences
dataset = QADataset(tokenizer, questions_answers, max_length)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
num_epochs = 48
for epoch in range(num_epochs):
    model.train()
    for batch in dataloader:
        inputs, masks = batch
        inputs, masks = inputs.to(device), masks.to(device)

        outputs = model(inputs, labels=inputs, attention_mask=masks)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch: {epoch}, Loss: {loss.item()}")

In [None]:
def generate_answer(question, model, tokenizer, max_length=50):
    # Tokenize the input question
    input_ids = tokenizer.encode(question, return_tensors='pt').to(device)

    # Generate the output (answer) using the model
    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

    # Decode the output to a human-readable format
    answer = tokenizer.decode(output[0], skip_special_tokens=True)

    return answer

# Example usage
question = "what is the captial city of America?"
model.eval()  # Set the model to evaluation mode
answer = generate_answer(question, model, tokenizer)
print(answer)

what is the captial city of America? Chicago


In [None]:
question = "what is the color of blood?"
model.eval()  # Set the model to evaluation mode
answer = generate_answer(question, model, tokenizer)
print(answer)

what is the color of blood? Red
