**Step 1: Import Libraries**

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import re
import pandas as pd
from torch.utils.data import Dataset, DataLoader


**Step 2:  Define the RNN Model**

In [2]:
class PoemRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(PoemRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])  
        return out, hidden


**Step 3: Load and Preprocess Dataset**

In [3]:
# Load the dataset
dataset_path = "/kaggle/input/poems/poems-100.csv"
df = pd.read_csv(dataset_path)

# Check column names
print("Column names in the dataset:", df.columns)

# Preprocess text (tokenization, lowercasing, etc.)
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s.,!?]", "", text)  # Keep basic punctuation
    return text.split()

# Ensure correct column name
correct_column_name = "text"  # Replace with actual column name from df.columns
df["tokens"] = df[correct_column_name].apply(preprocess_text)

# Display the first few rows with tokens
print(df.head())


Column names in the dataset: Index(['text'], dtype='object')
                                                text  \
0  O my Luve's like a red, red rose\nThat’s newly...   
1  The rose is red,\nThe violet's blue,\nSugar is...   
2  How do I love thee? Let me count the ways.\nI ...   
3  Had I the heavens' embroidered cloths,\nEnwrou...   
4  I.\n    Enough! we're tired, my heart and I.\n...   

                                              tokens  
0  [o, my, luves, like, a, red,, red, rose, thats...  
1  [the, rose, is, red,, the, violets, blue,, sug...  
2  [how, do, i, love, thee?, let, me, count, the,...  
3  [had, i, the, heavens, embroidered, cloths,, e...  
4  [i., enough!, were, tired,, my, heart, and, i....  


**Step 4: Prepare Vocabulary**

In [4]:
# Create vocabulary
all_words = [word for tokens in df["tokens"] for word in tokens]
unique_words = list(set(all_words))
word_vocab = {word: idx for idx, word in enumerate(unique_words)}
vocab_reverse = {idx: word for word, idx in word_vocab.items()}

# Vocabulary size
vocab_size = len(word_vocab)
print(f"Vocabulary size: {vocab_size}")

# Hyperparameters
embed_size = 256  
hidden_size = 512  
num_layers = 2  


Vocabulary size: 6725


**Step 5: Convert Dataset into Input-Target Sequences**

In [5]:
sequence_length = 5  

def create_sequences(tokens, seq_length):
    input_seqs, target_seqs = [], []
    for i in range(len(tokens) - seq_length):
        input_seqs.append([word_vocab[t] for t in tokens[i:i+seq_length]])
        target_seqs.append(word_vocab[tokens[i+seq_length]])
    return input_seqs, target_seqs

input_seqs, target_seqs = [], []
for tokens in df["tokens"]:
    inp, tgt = create_sequences(tokens, sequence_length)
    input_seqs.extend(inp)
    target_seqs.extend(tgt)

# Convert to PyTorch tensors
input_seqs = torch.tensor(input_seqs, dtype=torch.long)
target_seqs = torch.tensor(target_seqs, dtype=torch.long)


**Step 6: Define Dataset & DataLoader**

In [6]:
class PoemDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

batch_size = 64
dataset = PoemDataset(input_seqs, target_seqs)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


**Step 7: Initialize Model & Optimizer**

In [7]:
model = PoemRNN(vocab_size, embed_size, hidden_size, num_layers)
model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


**Step 8: Training Loop**

In [8]:
num_epochs = 200  

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for inputs, targets in dataloader:
        inputs, targets = inputs.cuda(), targets.cuda()
        
        hidden = (
            torch.zeros(num_layers, inputs.size(0), hidden_size).cuda(),
            torch.zeros(num_layers, inputs.size(0), hidden_size).cuda(),
        )

        optimizer.zero_grad()
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

# Save trained model
torch.save(model.state_dict(), "poem_rnn_200_epochs.pth")
print("Training complete. Model saved as 'poem_rnn_200_epochs.pth'.")


Epoch 1/200, Loss: 7.1256
Epoch 2/200, Loss: 6.4712
Epoch 3/200, Loss: 5.9175
Epoch 4/200, Loss: 5.0131
Epoch 5/200, Loss: 3.6924
Epoch 6/200, Loss: 2.1624
Epoch 7/200, Loss: 0.9200
Epoch 8/200, Loss: 0.2853
Epoch 9/200, Loss: 0.0872
Epoch 10/200, Loss: 0.0448
Epoch 11/200, Loss: 0.0308
Epoch 12/200, Loss: 0.0239
Epoch 13/200, Loss: 0.0206
Epoch 14/200, Loss: 0.0195
Epoch 15/200, Loss: 0.0185
Epoch 16/200, Loss: 0.0172
Epoch 17/200, Loss: 0.1359
Epoch 18/200, Loss: 0.6042
Epoch 19/200, Loss: 0.0530
Epoch 20/200, Loss: 0.0183
Epoch 21/200, Loss: 0.0138
Epoch 22/200, Loss: 0.0116
Epoch 23/200, Loss: 0.0114
Epoch 24/200, Loss: 0.0116
Epoch 25/200, Loss: 0.0108
Epoch 26/200, Loss: 0.0112
Epoch 27/200, Loss: 0.0116
Epoch 28/200, Loss: 0.0110
Epoch 29/200, Loss: 0.0113
Epoch 30/200, Loss: 0.0121
Epoch 31/200, Loss: 0.3204
Epoch 32/200, Loss: 0.2512
Epoch 33/200, Loss: 0.0247
Epoch 34/200, Loss: 0.0118
Epoch 35/200, Loss: 0.0109
Epoch 36/200, Loss: 0.0093
Epoch 37/200, Loss: 0.0094
Epoch 38/2

**Step 9: Poem Generation Function**

In [9]:
def generate_poem(start_words, length=100, temperature=0.8, top_k=5):
    model.eval()
    current_words = start_words
    result = [word.capitalize() for word in current_words]

    batch_size = 1
    hidden = (
        torch.zeros(num_layers, batch_size, hidden_size).cuda(),
        torch.zeros(num_layers, batch_size, hidden_size).cuda(),
    )

    for _ in range(length - len(start_words)):  
        input_tensor = torch.tensor([[word_vocab.get(current_words[-1], 0)]]).cuda()
        output, hidden = model(input_tensor, hidden)

        output = output / temperature
        probabilities = F.softmax(output, dim=1).squeeze()
        top_k_values, top_k_indices = torch.topk(probabilities, top_k)

        best_word_idx = top_k_indices[random.randint(0, len(top_k_indices) - 1)]
        next_word = vocab_reverse.get(best_word_idx.item(), "<UNK>")

        if next_word in ["<PAD>", "<UNK>"]:
            break

        result.append(next_word)
        current_words.append(next_word)

        # Add line breaks
        if len(result) % 8 == 0:
            result.append("\n")  

    poem = " ".join(result)
    poem = re.sub(r"\s+([.,!?])", r"\1", poem)  
    poem = re.sub(r"\n\s*", "\n", poem)  
    return poem


**Step 10: Generate & Display Poem**

In [10]:
start_words = ["she", "married"]
poem = generate_poem(start_words, length=50, temperature=0.8, top_k=5)

print("Generated Poem:\n", poem)


Generated Poem:
 She Married it may call the last i 
but not lovers the kind of these 
the song and it to within a 
roots not is no first or myself, 
there is myself, for the first but 
it is eternal part all how for 
those and wait they be are, they 

