In [3]:
!pip install trl



In [4]:
import torch
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import LoraConfig, get_peft_model, TaskType
from datasets import DatasetDict, Dataset
from trl import SFTTrainer

In [5]:
# Check and Set CUDA
print("PyTorch Version:", torch.__version__)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print("CUDA Available:", torch.cuda.is_available())

PyTorch Version: 2.5.1+cu121
CUDA Available: True


In [6]:
# Load Dataset
df = pd.read_csv("/kaggle/input/dataset1/math_riddles.csv")

# Split Dataset (80% train, 10% validation, 10% test)
def split_dataset(df):
    train_size = int(0.8 * len(df))  # 5 samples
    val_size = int(0.1 * len(df))    # 1 sample
    test_size = len(df) - train_size - val_size  # 1 sample
    
    train_df = df[:train_size]
    val_df = df[train_size:train_size + val_size]
    test_df = df[train_size + val_size:]
    
    return train_df, val_df, test_df

train_df, val_df, test_df = split_dataset(df)
dataset = DatasetDict({
    "train": Dataset.from_pandas(train_df),
    "validation": Dataset.from_pandas(val_df),
    "test": Dataset.from_pandas(test_df)
})

In [7]:
# Load Tokenizer and Model
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("gpt2", torch_dtype=torch.float16, device_map="auto")

# Preprocessing: Format as full prompts
def preprocess_function(examples):
    prompts = [f"Math Riddle: {riddle}\nAnswer: {answer}" for riddle, answer in zip(examples["riddle"], examples["answer"])]
    return tokenizer(prompts, truncation=True, padding="max_length", max_length=128)

tokenized_datasets = dataset.map(preprocess_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Map:   0%|          | 0/24 [00:00<?, ? examples/s]

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [8]:
# LoRA Configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    target_modules=["c_attn", "c_proj"]  # Focus on attention layers for GPT-2
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475




In [9]:
# Training Arguments
training_args = TrainingArguments(
    output_dir="/kaggle/working/riddle_model",
    per_device_train_batch_size=2,
    num_train_epochs=8,  # Matches assignment requirement
    save_strategy="epoch",
    logging_dir="/kaggle/working/logs",
    report_to="none",
    eval_strategy="epoch",  # Updated from evaluation_strategy
    fp16=True,
    logging_steps=1  # Log frequently due to small dataset
)

# Trainer Setup
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    processing_class=tokenizer,  # Updated from tokenizer
)

# Train
model.train()
trainer.train()

# Save Model
trainer.model.save_pretrained("/kaggle/working/trained-model")

Converting train dataset to ChatML:   0%|          | 0/24 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/24 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/24 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/3 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/3 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/3 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,4.4034,3.982834
2,3.4569,3.887849
3,4.3546,3.765164
4,3.2424,3.643247
5,3.62,3.527553
6,3.436,3.435995
7,3.4261,3.377908
8,3.3823,3.360038


In [10]:
# Generate 5 Riddles
def generate_riddle(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        temperature=0.7,
        top_p=0.9,
        do_sample=True  # Enable sampling for creativity
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\nGenerated Riddles:")
generated_riddles = [generate_riddle("Math Riddle: ") for _ in range(5)]
for i, riddle in enumerate(generated_riddles):
    print(f"Riddle {i+1}: {riddle}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generated Riddles:


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Riddle 1: Math Riddle:  How do you find out if you've been on a train for 5 minutes or less?  How do you determine if you're going to be able to take a break?  How do you find out if you're going to be able to
Riddle 2: Math Riddle:  Is there any way to answer the question of how many times we have done this before?  Is there any way to answer the question of how many times we have done this before?  Or is there a way to answer the question
Riddle 3: Math Riddle:  The following is a description of a simple riddle (a "bunny" in the words) that I have found to be very difficult to solve. I have been trying to solve this riddle for the past year.
The answer is
Riddle 4: Math Riddle:  (1)  What is the probability of a dog being killed by a human?  (2)  What is the probability of a dog being killed by a human?  (3)  What is the probability
Riddle 5: Math Riddle:  Can we have an answer to the puzzle of how to make a bullet out of a bullet?  Answer:  I don't know.  I have been working on this

In [11]:
# Basic Evaluation (Manual Check)
print("\nManual Evaluation:")
for riddle in generated_riddles:
    try:
        riddle_text = riddle.split("Answer:")[0].strip()
        answer = riddle.split("Answer:")[1].strip()
        print(f"Riddle: {riddle_text}")
        print(f"Stated Answer: {answer}")
        # Add manual verification logic here if needed
    except IndexError:
        print(f"Invalid format: {riddle}")


Manual Evaluation:
Invalid format: Math Riddle:  How do you find out if you've been on a train for 5 minutes or less?  How do you determine if you're going to be able to take a break?  How do you find out if you're going to be able to
Invalid format: Math Riddle:  Is there any way to answer the question of how many times we have done this before?  Is there any way to answer the question of how many times we have done this before?  Or is there a way to answer the question
Invalid format: Math Riddle:  The following is a description of a simple riddle (a "bunny" in the words) that I have found to be very difficult to solve. I have been trying to solve this riddle for the past year.
The answer is
Invalid format: Math Riddle:  (1)  What is the probability of a dog being killed by a human?  (2)  What is the probability of a dog being killed by a human?  (3)  What is the probability
Riddle: Math Riddle:  Can we have an answer to the puzzle of how to make a bullet out of a bullet?
Stated Ans