In [1]:
# Predict A Poem Using 100 Poem Dataset

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import csv

In [3]:
# Load the Dataset
text = ""
with open("/kaggle/input/poems-dataset/poems-100.csv", "r") as file:
    reader = csv.reader(file)
    for row in reader:
        text += " ".join(row) + " "                          # Combine All Lines into a Single Text

In [4]:
# Tokenize the Text into Words
tokens = text.split()

In [5]:
# Create a Dictionary to Map Words to Indices
word_to_idx = {}
idx_to_word = {}
vocab_size = 0

for word in tokens:
    if word not in word_to_idx:
        word_to_idx[word] = vocab_size
        idx_to_word[vocab_size] = word
        vocab_size += 1

# Convert Tokens to Indices
token_indices = [word_to_idx[word] for word in tokens]

In [6]:
# Create Sequences and Targets
seq_length = 10
sequences = []
targets = []

for i in range(len(token_indices) - seq_length):
    seq = token_indices[i:i + seq_length]
    target = token_indices[i + seq_length]
    sequences.append(seq)
    targets.append(target)

# Convert to PyTorch Tensors
sequences = torch.tensor(sequences, dtype = torch.long)
targets = torch.tensor(targets, dtype = torch.long)

In [7]:
class PoemRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(PoemRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        out = self.fc(output[:, -1, :])                    # Use the Last Hidden State for Prediction
        return out

# Hyperparameters
embed_dim = 100
hidden_dim = 128
output_dim = vocab_size

# Initialize the Model
model = PoemRNN(vocab_size, embed_dim, hidden_dim, output_dim)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [9]:
# Training loop
num_epochs = 250
batch_size = 32

for epoch in range(num_epochs):
    for i in range(0, len(sequences), batch_size):
        batch_seq = sequences[i:i + batch_size]
        batch_target = targets[i:i + batch_size]
        
        # Forward Pass
        outputs = model(batch_seq)
        loss = criterion(outputs, batch_target)
        
        # Backward Pass and Optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/250], Loss: 7.1773
Epoch [2/250], Loss: 6.3784
Epoch [3/250], Loss: 5.5785
Epoch [4/250], Loss: 4.9980
Epoch [5/250], Loss: 4.5038
Epoch [6/250], Loss: 3.8939
Epoch [7/250], Loss: 3.3710
Epoch [8/250], Loss: 2.9164
Epoch [9/250], Loss: 2.4995
Epoch [10/250], Loss: 2.1696
Epoch [11/250], Loss: 1.9270
Epoch [12/250], Loss: 1.6605
Epoch [13/250], Loss: 1.5087
Epoch [14/250], Loss: 1.3465
Epoch [15/250], Loss: 1.2375
Epoch [16/250], Loss: 1.2320
Epoch [17/250], Loss: 1.0777
Epoch [18/250], Loss: 1.0685
Epoch [19/250], Loss: 0.7137
Epoch [20/250], Loss: 0.8408
Epoch [21/250], Loss: 0.7069
Epoch [22/250], Loss: 0.6931
Epoch [23/250], Loss: 0.5312
Epoch [24/250], Loss: 0.5305
Epoch [25/250], Loss: 0.3779
Epoch [26/250], Loss: 0.5130
Epoch [27/250], Loss: 0.5685
Epoch [28/250], Loss: 0.7442
Epoch [29/250], Loss: 0.6545
Epoch [30/250], Loss: 0.4889
Epoch [31/250], Loss: 0.2522
Epoch [32/250], Loss: 0.2165
Epoch [33/250], Loss: 0.1536
Epoch [34/250], Loss: 0.1722
Epoch [35/250], Loss: 0

In [10]:
def generate_poem(model, seed_text, num_words = 50):
    model.eval()
    words = seed_text.split()
    with torch.no_grad():
        for _ in range(num_words):
            # Get the Last `seq_length` Words
            seq = [word_to_idx.get(word, 0) for word in words[-seq_length:]]  # Use 0 for OOV Words
            seq = torch.tensor(seq, dtype = torch.long).unsqueeze(0)
            output = model(seq)

            # Apply Softmax
            probabilities = F.softmax(output, dim = 1)

            # Sample from the Probability Distribution
            predicted_idx = torch.multinomial(probabilities, 1).item()

            words.append(idx_to_word[predicted_idx])
            
    return " ".join(words)

# Generate a Poem
seed_text = "I wandered lonely as a"
generated_poem = generate_poem(model, seed_text, num_words = 50)
print(generated_poem)

I wandered lonely as a man, Stuff'd with the stuff that is coarse and stuff'd with the stuff that is fine, One of the Nation of many nations, the smallest the same and the largest the same, A Southerner soon as a Northerner, a planter nonchalant and hospitable down by the Oconee I live, A
