In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import csv
import time

In [2]:
# Load the Dataset
text = ""
with open("/kaggle/input/poem-100/poems-100 - poems-100.csv", "r") as file:
    reader = csv.reader(file)
    for row in reader:
        text += " ".join(row) + " "                          # Combine All Lines into a Single Text

In [3]:
# Tokenize the Text into Words
tokens = text.split()

In [4]:
# Create a Dictionary to Map Words to Indices
word_to_idx = {}
idx_to_word = {}
vocab_size = 0

for word in tokens:
    if word not in word_to_idx:
        word_to_idx[word] = vocab_size
        idx_to_word[vocab_size] = word
        vocab_size += 1

In [5]:
# Convert Tokens to Indices
token_indices = [word_to_idx[word] for word in tokens]

In [6]:
# Create Sequences and Targets
seq_length = 10
sequences = []
targets = []

for i in range(len(token_indices) - seq_length):
    seq = token_indices[i:i + seq_length]
    target = token_indices[i + seq_length]
    sequences.append(seq)
    targets.append(target)

In [7]:
# Convert to PyTorch Tensors
sequences = torch.tensor(sequences, dtype = torch.long)
targets = torch.tensor(targets, dtype = torch.long)

In [8]:
# Define One-Hot Encoding for RNN Model
class OneHotRNN(nn.Module):
    def __init__(self, vocab_size, hidden_dim, output_dim):
        super(OneHotRNN, self).__init__()
        self.rnn = nn.RNN(vocab_size, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        output, _ = self.rnn(x)
        out = self.fc(output[:, -1, :])
        return out

In [9]:
# Define One-Hot Encoding for LSTM Model
class OneHotLSTM(nn.Module):
    def __init__(self, vocab_size, hidden_dim, output_dim):
        super(OneHotLSTM, self).__init__()
        self.lstm = nn.LSTM(vocab_size, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        output, _ = self.lstm(x)
        out = self.fc(output[:, -1, :])
        return out

In [10]:
# Define Embedding for RNN Model
class EmbeddingRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(EmbeddingRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        output, _ = self.rnn(x)
        out = self.fc(output[:, -1, :])
        return out

In [11]:
# Define Embedding for LSTM Model
class EmbeddingLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super(EmbeddingLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        output, _ = self.lstm(x)
        out = self.fc(output[:, -1, :])
        return out

In [12]:
# Hyperparameters
embed_dim = 100
hidden_dim = 128
output_dim = vocab_size

In [13]:
# Initialize Models
onehot_rnn = OneHotRNN(vocab_size, hidden_dim, output_dim)
onehot_lstm = OneHotLSTM(vocab_size, hidden_dim, output_dim)
embedding_rnn = EmbeddingRNN(vocab_size, embed_dim, hidden_dim, output_dim)
embedding_lstm = EmbeddingLSTM(vocab_size, embed_dim, hidden_dim, output_dim)

In [14]:
# Optimizers
criterion = nn.CrossEntropyLoss()
onehot_rnn_optimizer = optim.Adam(onehot_rnn.parameters(), lr=0.001)
onehot_lstm_optimizer = optim.Adam(onehot_lstm.parameters(), lr=0.001)
embedding_rnn_optimizer = optim.Adam(embedding_rnn.parameters(), lr=0.001)
embedding_lstm_optimizer = optim.Adam(embedding_lstm.parameters(), lr=0.001)

In [15]:
# Loss Tracking
onehot_rnn_losses, onehot_lstm_losses, embedding_rnn_losses, embedding_lstm_losses = [], [], [], []

In [16]:
# Training Function with Tracking
def train_model(model, optimizer, name):
    start_time = time.time()
    for epoch in range(70):
        total_loss = 0
        for i in range(0, len(sequences), 32):
            batch_seq = sequences[i:i + 32]
            batch_target = targets[i:i + 32]

            # One-Hot Encoding for OneHot Models
            if "OneHot" in name:
                batch_seq = F.one_hot(batch_seq, num_classes=vocab_size).float()

            # Forward Pass
            outputs = model(batch_seq)
            loss = criterion(outputs, batch_target)

            # Backward Pass and Optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / (len(sequences) // 32)
        if name == "OneHotRNN":
            onehot_rnn_losses.append(avg_loss)
        elif name == "OneHotLSTM":
            onehot_lstm_losses.append(avg_loss)
        elif name == "EmbeddingRNN":
            embedding_rnn_losses.append(avg_loss)
        else:
            embedding_lstm_losses.append(avg_loss)

        print(f"{name} Epoch [{epoch+1}/70], Avg Loss: {avg_loss:.4f}")
    print(f"{name} Training Time: {time.time() - start_time:.2f}s\n")

In [17]:
# Poem Generation Function
def generate_poem(model, seed_text, num_words=50, model_type="EmbeddingLSTM"):
    model.eval()
    words = seed_text.split()
    with torch.no_grad():
        for _ in range(num_words):
            seq = [word_to_idx.get(word, 0) for word in words[-seq_length:]]
            seq = torch.tensor(seq, dtype=torch.long).unsqueeze(0)

            if "OneHot" in model_type:
                seq = F.one_hot(seq, num_classes=vocab_size).float()

            output = model(seq)
            probabilities = F.softmax(output, dim=1)
            predicted_idx = torch.multinomial(probabilities, 1).item()
            words.append(idx_to_word[predicted_idx])

    return " ".join(words)

In [18]:
# Train All Models
train_model(onehot_rnn, onehot_rnn_optimizer, "OneHotRNN")

OneHotRNN Epoch [1/70], Avg Loss: 7.5795
OneHotRNN Epoch [2/70], Avg Loss: 6.6851
OneHotRNN Epoch [3/70], Avg Loss: 6.3195
OneHotRNN Epoch [4/70], Avg Loss: 6.0942
OneHotRNN Epoch [5/70], Avg Loss: 5.9369
OneHotRNN Epoch [6/70], Avg Loss: 5.7794
OneHotRNN Epoch [7/70], Avg Loss: 5.6184
OneHotRNN Epoch [8/70], Avg Loss: 5.4501
OneHotRNN Epoch [9/70], Avg Loss: 5.2783
OneHotRNN Epoch [10/70], Avg Loss: 5.0746
OneHotRNN Epoch [11/70], Avg Loss: 4.8594
OneHotRNN Epoch [12/70], Avg Loss: 4.6353
OneHotRNN Epoch [13/70], Avg Loss: 4.3697
OneHotRNN Epoch [14/70], Avg Loss: 4.0834
OneHotRNN Epoch [15/70], Avg Loss: 3.8099
OneHotRNN Epoch [16/70], Avg Loss: 3.5324
OneHotRNN Epoch [17/70], Avg Loss: 3.2308
OneHotRNN Epoch [18/70], Avg Loss: 2.9246
OneHotRNN Epoch [19/70], Avg Loss: 2.6163
OneHotRNN Epoch [20/70], Avg Loss: 2.3029
OneHotRNN Epoch [21/70], Avg Loss: 1.9947
OneHotRNN Epoch [22/70], Avg Loss: 1.7124
OneHotRNN Epoch [23/70], Avg Loss: 1.4489
OneHotRNN Epoch [24/70], Avg Loss: 1.2298
O

In [19]:
seed_text = "O my Luve"
print("\nGenerated Poem (OneHotRNN):", generate_poem(onehot_rnn, seed_text, model_type="OneHotRNN"))


Generated Poem (OneHotRNN): O my Luve And of one while I felt the wind of my day and lips. My last peal the spring beauty, and she Ride, gone goes for every object When in the mouth, arms who And make for something of day, The blood. of flowing Under my eye? When will the wind


In [20]:
train_model(onehot_lstm, onehot_lstm_optimizer, "OneHotLSTM")

OneHotLSTM Epoch [1/70], Avg Loss: 7.5810
OneHotLSTM Epoch [2/70], Avg Loss: 6.6967
OneHotLSTM Epoch [3/70], Avg Loss: 6.3477
OneHotLSTM Epoch [4/70], Avg Loss: 6.1494
OneHotLSTM Epoch [5/70], Avg Loss: 6.0195
OneHotLSTM Epoch [6/70], Avg Loss: 5.7739
OneHotLSTM Epoch [7/70], Avg Loss: 5.5545
OneHotLSTM Epoch [8/70], Avg Loss: 5.3069
OneHotLSTM Epoch [9/70], Avg Loss: 5.0388
OneHotLSTM Epoch [10/70], Avg Loss: 4.7891
OneHotLSTM Epoch [11/70], Avg Loss: 4.5427
OneHotLSTM Epoch [12/70], Avg Loss: 4.3147
OneHotLSTM Epoch [13/70], Avg Loss: 4.0733
OneHotLSTM Epoch [14/70], Avg Loss: 3.7760
OneHotLSTM Epoch [15/70], Avg Loss: 3.4339
OneHotLSTM Epoch [16/70], Avg Loss: 3.0609
OneHotLSTM Epoch [17/70], Avg Loss: 2.6577
OneHotLSTM Epoch [18/70], Avg Loss: 2.3469
OneHotLSTM Epoch [19/70], Avg Loss: 2.1408
OneHotLSTM Epoch [20/70], Avg Loss: 1.8252
OneHotLSTM Epoch [21/70], Avg Loss: 1.5195
OneHotLSTM Epoch [22/70], Avg Loss: 1.2210
OneHotLSTM Epoch [23/70], Avg Loss: 0.9860
OneHotLSTM Epoch [24

In [21]:
print("\nGenerated Poem (OneHotLSTM):", generate_poem(onehot_lstm, seed_text, model_type="OneHotLSTM"))


Generated Poem (OneHotLSTM): O my Luve rest click left,) headstone foreign, missing, none more looking Thou dost smile, I sing the while, Sweet joy befall thee! The city had withdrawn into itself And left at last the country to the country; When between whirls of snow not come to lie And whirls of foliage not yet


In [22]:
train_model(embedding_rnn, embedding_rnn_optimizer, "EmbeddingRNN")

EmbeddingRNN Epoch [1/70], Avg Loss: 7.5929
EmbeddingRNN Epoch [2/70], Avg Loss: 6.3903
EmbeddingRNN Epoch [3/70], Avg Loss: 5.7654
EmbeddingRNN Epoch [4/70], Avg Loss: 5.2057
EmbeddingRNN Epoch [5/70], Avg Loss: 4.7298
EmbeddingRNN Epoch [6/70], Avg Loss: 4.2991
EmbeddingRNN Epoch [7/70], Avg Loss: 3.7787
EmbeddingRNN Epoch [8/70], Avg Loss: 3.2889
EmbeddingRNN Epoch [9/70], Avg Loss: 2.9054
EmbeddingRNN Epoch [10/70], Avg Loss: 2.5815
EmbeddingRNN Epoch [11/70], Avg Loss: 2.2945
EmbeddingRNN Epoch [12/70], Avg Loss: 2.0746
EmbeddingRNN Epoch [13/70], Avg Loss: 1.9019
EmbeddingRNN Epoch [14/70], Avg Loss: 1.6860
EmbeddingRNN Epoch [15/70], Avg Loss: 1.4679
EmbeddingRNN Epoch [16/70], Avg Loss: 1.2722
EmbeddingRNN Epoch [17/70], Avg Loss: 1.1039
EmbeddingRNN Epoch [18/70], Avg Loss: 0.9583
EmbeddingRNN Epoch [19/70], Avg Loss: 0.8286
EmbeddingRNN Epoch [20/70], Avg Loss: 0.7190
EmbeddingRNN Epoch [21/70], Avg Loss: 0.6269
EmbeddingRNN Epoch [22/70], Avg Loss: 0.5413
EmbeddingRNN Epoch 

In [23]:
print("\nGenerated Poem (EmbeddingRNN):", generate_poem(embedding_rnn, seed_text, model_type="EmbeddingRNN"))


Generated Poem (EmbeddingRNN): O my Luve and Oh, I can my good will, Scattering it freely forever. The pure contralto sings in the organ loft, The carpenter dresses his plank, the tongue of his foreplane whistles its wild ascending lisp, The married and unmarried children ride home to their Thanksgiving dinner, The pilot seizes the king-pin,


In [24]:
train_model(embedding_lstm, embedding_lstm_optimizer, "EmbeddingLSTM")

EmbeddingLSTM Epoch [1/70], Avg Loss: 7.5631
EmbeddingLSTM Epoch [2/70], Avg Loss: 6.5433
EmbeddingLSTM Epoch [3/70], Avg Loss: 5.9872
EmbeddingLSTM Epoch [4/70], Avg Loss: 5.4844
EmbeddingLSTM Epoch [5/70], Avg Loss: 4.9963
EmbeddingLSTM Epoch [6/70], Avg Loss: 4.5167
EmbeddingLSTM Epoch [7/70], Avg Loss: 4.1496
EmbeddingLSTM Epoch [8/70], Avg Loss: 3.7315
EmbeddingLSTM Epoch [9/70], Avg Loss: 3.2990
EmbeddingLSTM Epoch [10/70], Avg Loss: 2.9829
EmbeddingLSTM Epoch [11/70], Avg Loss: 2.6195
EmbeddingLSTM Epoch [12/70], Avg Loss: 2.2721
EmbeddingLSTM Epoch [13/70], Avg Loss: 1.9076
EmbeddingLSTM Epoch [14/70], Avg Loss: 1.5767
EmbeddingLSTM Epoch [15/70], Avg Loss: 1.2990
EmbeddingLSTM Epoch [16/70], Avg Loss: 1.0700
EmbeddingLSTM Epoch [17/70], Avg Loss: 0.8783
EmbeddingLSTM Epoch [18/70], Avg Loss: 0.7216
EmbeddingLSTM Epoch [19/70], Avg Loss: 0.6034
EmbeddingLSTM Epoch [20/70], Avg Loss: 0.4990
EmbeddingLSTM Epoch [21/70], Avg Loss: 0.4168
EmbeddingLSTM Epoch [22/70], Avg Loss: 0.35

In [25]:
print("\nGenerated Poem (EmbeddingLSTM):", generate_poem(embedding_lstm, seed_text, model_type="EmbeddingLSTM"))


Generated Poem (EmbeddingLSTM): O my Luve and space, And that is enough, Through the past is the clock came out for good strong I rock It was not lost listening to you, or whole Or I guess it is a uniform hieroglyphic, And it means, Sprouting alike in broad zones and narrow zones, Growing among black
