<a href="https://colab.research.google.com/github/Kanth-Somala/AI-Haiku-Poem-Generator/blob/main/AI_Genrated_Haiku_Poems.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets accelerate --quiet

In [None]:
# Load haiku file
file_path = '/content/lines.txt'

with open(file_path, 'r', encoding='utf-8') as f:
    raw_text = f.read()

# Split by '$' and convert each haiku into 3-line format
raw_haikus = [h.strip() for h in raw_text.split('$') if h.strip()]

formatted_haikus = []
for h in raw_haikus:
    parts = [line.strip() for line in h.split('/') if line.strip()]
    if len(parts) == 3:
        formatted_haikus.append('\n'.join(parts))  # maintain 3-line format


In [None]:
from datasets import Dataset
dataset = Dataset.from_dict({"text": formatted_haikus})


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(example):
    tokens = tokenizer(example["text"], padding="max_length", truncation=True, max_length=50)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize_function)


In [None]:
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer

model = AutoModelForCausalLM.from_pretrained("gpt2")
model.resize_token_embeddings(len(tokenizer))

training_args = TrainingArguments(
    output_dir="./haiku-gpt2",
    per_device_train_batch_size=4,
    num_train_epochs=5,
    logging_steps=10,
    save_strategy="epoch",
    fp16=False,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)


In [None]:
trainer.train()

In [None]:
model.save_pretrained("./haiku-gpt2-finetuned")
tokenizer.save_pretrained("./haiku-gpt2-finetuned")

In [None]:
from transformers import pipeline

generator = pipeline("text-generation", model="./haiku-gpt2-finetuned", tokenizer="./haiku-gpt2-finetuned")

prompt = "nature is awsome"
outputs = generator(prompt, max_new_tokens=40, num_return_sequences=3, do_sample=True, top_k=50)

for i, output in enumerate(outputs):
    print(f"\nHaiku {i+1}:\n{output['generated_text']}")


In [None]:
!pip install syllables --quiet


In [None]:
import syllables
from transformers import pipeline

prompt = input("Enter a haiku theme: ")

generator = pipeline(
    "text-generation",
    model="./haiku-gpt2-finetuned",
    tokenizer="./haiku-gpt2-finetuned"
)

outputs = generator(
    prompt,
    max_new_tokens=40,
    num_return_sequences=100,
    do_sample=True,
    top_k=50
)

def count_syllables(line):
    return sum(syllables.estimate(word) for word in line.split())

def is_valid_575(lines):
    return (
        len(lines) == 3 and
        count_syllables(lines[0]) == 5 and
        count_syllables(lines[1]) == 7 and
        count_syllables(lines[2]) == 5
    )

def extract_haiku(text):
    lines = [line.strip() for line in text.split('\n') if line.strip()]
    if is_valid_575(lines):
        return lines
    words = text.strip().split()
    if len(words) < 17:
        return None
    candidate_lines = [
        " ".join(words[:5]),
        " ".join(words[5:12]),
        " ".join(words[12:17])
    ]
    if is_valid_575(candidate_lines):
        return candidate_lines
    return None

valid_haikus = []
for output in outputs:
    text = output["generated_text"]
    haiku = extract_haiku(text)
    if haiku:
        valid_haikus.append(haiku)
        if len(valid_haikus) >= 3:
            break

if valid_haikus:
    print("\n Haikus Generated:")
    for idx, haiku in enumerate(valid_haikus, 1):
        print(f"\n🌿 Haiku {idx}:")
        for line in haiku:
            print(f"{line}")
else:
    print("\n❌ No valid 5-7-5 haikus were found. Try another theme or generate more.")


In [None]:
syllables.estimate('becarefulfolks')

In [None]:
!pip install nbstripout
!nbstripout AI Genrated Haiku Poems.ipynb
