<H1> Fine-Tuning a LLM for Haiku Structure </H1>

In [None]:
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer
import os

# Load your haiku dataset
dataset = load_dataset('text', data_files={'train': 'haiku.txt'})

# Tokenize the dataset
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 doesn't have a pad token

def tokenize_function(examples):
    tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=50)
    tokens["labels"] = tokens["input_ids"].copy()  # Add labels for loss computation
    return tokens
tokenized_datasets = dataset.map(tokenize_function, batched=True)


: 

In [None]:
# Load the GPT-2 model
model = GPT3LMHeadModel.from_pretrained("gpt2")

# Adjust GPT-2 to handle padding
model.resize_token_embeddings(len(tokenizer))

In [None]:
training_args = TrainingArguments(
    output_dir="./gpt2_haiku_model",  # Save directory
    overwrite_output_dir=True,
    num_train_epochs=3,              # Number of epochs
    per_device_train_batch_size=8,   # Adjust based on GPU memory
    per_device_eval_batch_size=8,
    save_steps=500,
    save_total_limit=2,
    logging_dir="./logs",            # Log directory
    logging_steps=10,
    evaluation_strategy="no",
    learning_rate=5e-5,              # Adjust learning rate if needed
    weight_decay=0.01,
    do_eval=False
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    tokenizer=tokenizer
)

In [None]:
# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./gpt2_finetuned_haiku")
tokenizer.save_pretrained("./gpt2_finetuned_haiku")

<h2> Testing

In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load the fine-tuned model
model = GPT2LMHeadModel.from_pretrained("./gpt2_finetuned_haiku")
tokenizer = GPT2Tokenizer.from_pretrained("./gpt2_finetuned_haiku")

def generate_haiku(prompt, model, tokenizer):
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = model.generate(
        inputs, 
        max_length=50, 
        num_return_sequences=1, 
        temperature=0.7, 
        top_k=50
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

prompt = "sunset over the mountain"
haiku = generate_haiku(prompt, model, tokenizer)
print("Generated Haiku:", haiku)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Haiku: sunset over the mountain /  the sun sets over the valley /  the moon sets over me $


<h2> Reward Function 

<h2> RL Training

In [2]:
from stable_baselines3 import PPO
from HaikuRefinerEnv_v0 import HaikuEnvironment

env = HaikuEnvironment()
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

# Evaluate
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)

print("Generated Haiku:", env.haiku)

AssertionError: nvec (counts) have to be positive