In [4]:
# Install required libraries
!pip install transformers datasets

# Import libraries
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
from datasets import Dataset


import pandas as pd



In [5]:

# Prepare the dataset
data = [
    {"text": "Riddle: What number becomes zero when you subtract 15 from half of it? Solution: 30"},
    {"text": "Riddle: I am an odd number. Take away a letter and I become even. What number am I? Solution: Seven"},
    {"text": "Riddle: If you multiply me by any other number, the answer will always be the same. What number am I? Solution: Zero"},
    {"text": "Riddle: What three positive numbers give the same result when multiplied and added together? Solution: 1, 2, 3"},
    {"text": "Riddle: I am a number. If you add 5 to me and then multiply by 3, the result is 36. What number am I? Solution: 7"},
    {"text": "Riddle: What is the smallest whole number that is equal to seven times the sum of its digits? Solution: 21"},
    {"text": "Riddle: If you divide 30 by half and add 10, what is the result? Solution: 70"},
    {"text": "Riddle: What number is twice the sum of its digits? Solution: 18"},
    {"text": "Riddle: I am a number. If you double me and subtract 8, you get 12. What number am I? Solution: 10"},
    {"text": "Riddle: What is the next number in the sequence: 2, 4, 8, 16, ___? Solution: 32"},
    {"text": "Riddle: What number is the same as its double? Solution: 0"},
    {"text": "Riddle: I am a number. If you add 10 to me, I become 20. What number am I? Solution: 10"},
    {"text": "Riddle: What is the only even prime number? Solution: 2"},
    {"text": "Riddle: I am a number. If you square me, I become 16. What number am I? Solution: 4"},
    {"text": "Riddle: What is the smallest number that is divisible by both 6 and 8? Solution: 24"},
    {"text": "Riddle: I am a number. If you add 7 to me, I become 21. What number am I? Solution: 14"},
    {"text": "Riddle: What is the next number in the sequence: 1, 1, 2, 3, 5, 8, ___? Solution: 13"},
    {"text": "Riddle: I am a number. If you subtract 5 from me, I become 10. What number am I? Solution: 15"},
    {"text": "Riddle: What is the smallest number that is a multiple of both 3 and 5? Solution: 15"},
    {"text": "Riddle: I am a number. If you multiply me by 4, I become 20. What number am I? Solution: 5"},
    {"text": "Riddle: What is the next number in the sequence: 3, 6, 9, 12, ___? Solution: 15"},
    {"text": "Riddle: I am a number. If you add 12 to me, I become 24. What number am I? Solution: 12"},
    {"text": "Riddle: What is the smallest number that is divisible by both 4 and 7? Solution: 28"},
    {"text": "Riddle: I am a number. If you subtract 8 from me, I become 12. What number am I? Solution: 20"},
    {"text": "Riddle: What is the next number in the sequence: 10, 20, 30, 40, ___? Solution: 50"},
    {"text": "Riddle: I am a number. If you multiply me by 5, I become 25. What number am I? Solution: 5"},
    {"text": "Riddle: What is the smallest number that is divisible by both 9 and 12? Solution: 36"},
    {"text": "Riddle: I am a number. If you add 15 to me, I become 30. What number am I? Solution: 15"},
    {"text": "Riddle: What is the next number in the sequence: 5, 10, 20, 40, ___? Solution: 80"},
    {"text": "Riddle: I am a number. If you subtract 10 from me, I become 5. What number am I? Solution: 15"},
]

# Convert to Hugging Face Dataset
dataset = Dataset.from_dict({"text": [item["text"] for item in data]})

df = pd.DataFrame(data)

df.to_csv("math_riddles_dataset.csv", index=False)

print("Dataset saved as 'math_riddles_dataset.csv'")

Dataset saved as 'math_riddles_dataset.csv'


In [13]:
# Load GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set the pad_token to the eos_token
tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    # Tokenize the input text
    tokenized_output = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
    # Add labels for the model to compute loss
    tokenized_output["labels"] = tokenized_output["input_ids"]
    return tokenized_output

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
    report_to="none",  # Disable wandb logging
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
trainer.save_model("math_riddle_generator")
tokenizer.save_pretrained("math_riddle_generator")

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


('math_riddle_generator/tokenizer_config.json',
 'math_riddle_generator/special_tokens_map.json',
 'math_riddle_generator/vocab.json',
 'math_riddle_generator/merges.txt',
 'math_riddle_generator/added_tokens.json')

In [14]:
# Load the fine-tuned model and tokenizer
from transformers import pipeline

generator = pipeline("text-generation", model="math_riddle_generator", tokenizer=tokenizer)

# Generate 5 riddles
for _ in range(5):
    output = generator("Riddle:", max_length=50, num_return_sequences=1)
    print(output[0]["generated_text"])
    print("---")

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Riddle: I am a number. What number am I? Solution: 24
---
Riddle: What is the smallest integer that is divisible by 2? Solution: 4
---
Riddle: What is the smallest number and a positive integer? Solution: 10
---
Riddle: We can say that three people have the same name. If one person's first name is "John, the other two are Patrick and me." What number do you multiply by three? Solution: 8
---
Riddle: If a person is given a number, the number is equal to that number, while half of that number is in addition to that number. If they add 8 to the number, their result is 20. What number am I? Solution
---


**From the above generated 5 examples, riddle 2 makes sense**

In [21]:
generator = pipeline("text-generation", model="math_riddle_generator", tokenizer=tokenizer)

# Generate 5 riddles
for _ in range(5):
    output = generator("Riddle:", max_length=50, num_return_sequences=1)
    print(output[0]["generated_text"])
    print("---")

Device set to use cuda:0


Riddle: What is the smallest number that will equal one integer. Solution: 21
---
Riddle: Do people in the UK and in the north of the United States have two different numbers and are the same length? Solution: 5
---
Riddle: What is the smallest number that is divisible by 2 times its product? Solution: 7
---
Riddle: What is the most recent number from all the digits in the sequence: 4, 8, 16, ___? Solution: 16
---
Riddle: What is the sum of all my free, uni-collected digits? Solution: 40
---


**From the above generated 5 examples, riddle 4 makes sense**