<H1> Fine-Tuning LLM for Haiku Structure </H1>
https://github.com/davanstrien/haiku-dpo <br>
https://github.com/glakshay/Generating-Haiku-using-GAN/tree/master/dataset


In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Load the pre-trained GPT-2 model and tokenizer
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Load your haiku dataset (custom or from a source like Kaggle)
dataset = load_dataset("path_to_haiku_dataset")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Set up the training arguments
training_args = TrainingArguments(
    output_dir="./gpt2_haiku_model", # Where to save the fine-tuned model
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    logging_dir='./logs',  # For logging
)

# Set up the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

# Train the model
trainer.train()


  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


ImportError: 
GPT2LMHeadModel requires the PyTorch library but it was not found in your environment. Checkout the instructions on the
installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment.
Please note that you may need to restart your runtime after installation.


In [None]:
model.save_pretrained("./gpt2_finetuned_haiku")
tokenizer.save_pretrained("./gpt2_finetuned_haiku")

<h2> Detector </h2>
<P>
poem_lines = [
    "Autumn leaves fall",  # 4 syllables (violation) <br>
    "Swirling in the cold wind",  # 7 syllables (correct)<br>
    "A silent forest"  # 5 syllables (correct)<br>
]


violations = [(0, 4)]  # Line 0 violates syllable count

In [2]:
from nltk.tokenize import word_tokenize
import syllapy

def detect_line_violations(poem_lines):
    # Define the target syllable pattern for haiku (5-7-5)
    target_syllables = [5, 7, 5]
    
    violations = []
    
    # Check syllable count for each line
    for i, line in enumerate(poem_lines):
        syllable_count = sum(syllapy.count(word) for word in word_tokenize(line))
        if syllable_count != target_syllables[i]:
            violations.append((i, syllable_count))  # Record line index and its syllable count
    
    return violations


ModuleNotFoundError: No module named 'nltk'

<h2> Prompter </h2>

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def generate_haiku_suggestion(poem_context, target_syllables, model, tokenizer):
    prompt = f"Continue this haiku: {poem_context} (Target syllable count: {target_syllables})"
    
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    outputs = model.generate(inputs, max_length=50, num_return_sequences=3, temperature=0.7, top_k=50)
    
    suggestions = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
    return suggestions


<h2> Haiku Generation </h2>

In [None]:
poem = "Autumn leaves fall, swirling in the cold wind, a silent forest."
poem_lines = poem.split(", ")

# Detect violations
violations = detect_line_violations(poem_lines)

# If there are violations, prompt for replacements
for violation in violations:
    line_idx, syllable_count = violation
    target_syllables = 5 if line_idx == 0 else 7  # Adjust target syllables based on line
    suggestions = generate_haiku_suggestion(poem, target_syllables, model, tokenizer)
    print(f"Suggestions for line {line_idx}: {suggestions}")
    