## Read the Data

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LayerNormalization, MultiHeadAttention
from tensorflow.keras.optimizers import Adam

my_data = pd.read_csv("/kaggle/input/modern-renaissance-poetry/all.csv")
my_data.head()

Unnamed: 0,author,content,poem name,age,type
0,WILLIAM SHAKESPEARE,Let the bird of loudest lay\r\nOn the sole Ara...,The Phoenix and the Turtle,Renaissance,Mythology & Folklore
1,DUCHESS OF NEWCASTLE MARGARET CAVENDISH,"Sir Charles into my chamber coming in,\r\nWhen...",An Epilogue to the Above,Renaissance,Mythology & Folklore
2,THOMAS BASTARD,"Our vice runs beyond all that old men saw,\r\n...","Book 7, Epigram 42",Renaissance,Mythology & Folklore
3,EDMUND SPENSER,"Lo I the man, whose Muse whilome did maske,\r\...","from The Faerie Queene: Book I, Canto I",Renaissance,Mythology & Folklore
4,RICHARD BARNFIELD,"Long have I longd to see my love againe,\r\nSt...",Sonnet 16,Renaissance,Mythology & Folklore


In [2]:
my_data.shape

(573, 5)

In [3]:
my_data['type'].unique()

array(['Mythology & Folklore', 'Nature', 'Love'], dtype=object)

In [4]:
# extract love poems as a list
love_poems = my_data.loc[my_data['type'] == 'Love']['content']
love_poems = list(love_poems)

love_poems[0], len(love_poems)

('Why didst thou promise such a beauteous day,\r\nAnd make me travel forth without my cloak,\r\nTo let base clouds oertake me in my way,\r\nHiding thy bravery in their rotten smoke?\r\nTis not enough that through the cloud thou break,\r\nTo dry the rain on my storm-beaten face,\r\nFor no man well of such a salve can speak\r\nThat heals the wound and cures not the disgrace:\r\nNor can thy shame give physic to my grief;\r\nThough thou repent, yet I have still the loss:\r\nThe offenders sorrow lends but weak relief\r\nTo him that bears the strong offences cross.\r\n   Ah! but those tears are pearl which thy love sheds,\r\n   And they are rich and ransom all ill deeds.\r\n \r\n \r\n ',
 326)

In [5]:
poems = love_poems
print(poems[11])

Joy of my life, full oft for loving you
    I bless my lot, that was so lucky placed:
    But then the more your own mishap I rue,
    That are so much by so mean love embased.
For had the equal heavens so much you graced
    In this as in the rest, ye might invent
    Some heavenly wit, whose verse could have enchased
    Your glorious name in golden monument.
But since ye deignd so goodly to relent
    To me your thrall, in whom is little worth,
    That little that I am shall all be spent
    In setting your immortal praises forth;
Whose lofty argument uplifting me
    Shall lift you up unto an high degree.


## Preprocessing the Data

In [6]:
def preprocess_text(text):
#     text = re.sub(r"[^\w\s]", "", text).lower()
    text = ' '.join(text.split()).lower()
    return text

cleaned_poems = [preprocess_text(poem) for poem in poems]
cleaned_poems[11]

'joy of my life, full oft for loving you i bless my lot, that was so lucky placed: but then the more your own mishap i rue, that are so much by so mean love embased. for had the equal heavens so much you graced in this as in the rest, ye might invent some heavenly wit, whose verse could have enchased your glorious name in golden monument. but since ye deignd so goodly to relent to me your thrall, in whom is little worth, that little that i am shall all be spent in setting your immortal praises forth; whose lofty argument uplifting me shall lift you up unto an high degree.'

In [7]:
# Create vocabulary using word-level Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(cleaned_poems)
vocab_size = len(tokenizer.word_index) + 1

vocab_size

7231

Initially, I am directly converting the poems into sequences.
For each, sequence

predictor = complete poem

label = start from 2nd word  

Using padding both are made same, but label is one word ahead.

Now, I will try to use the way I used in LSTM notebook. Create multiple sequences from one poem, label would be single word (next word in the poem, not complete poem from next word like this).

In [8]:
# Create input sequences
input_sequences = []

for poem in cleaned_poems:
    # numerical representation of poem
    token_list = tokenizer.texts_to_sequences([poem])[0]
    
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

input_sequences[0:10]

[[141, 1000],
 [141, 1000, 28],
 [141, 1000, 28, 865],
 [141, 1000, 28, 865, 63],
 [141, 1000, 28, 865, 63, 9],
 [141, 1000, 28, 865, 63, 9, 676],
 [141, 1000, 28, 865, 63, 9, 676, 112],
 [141, 1000, 28, 865, 63, 9, 676, 112, 1],
 [141, 1000, 28, 865, 63, 9, 676, 112, 1, 76],
 [141, 1000, 28, 865, 63, 9, 676, 112, 1, 76, 15]]

In [9]:
# Pad sequences
max_seq_length = max([len(seq) for seq in input_sequences])
print("Longest sequence is: ", max_seq_length)

padded_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_length, padding='pre'))

padded_sequences[0:3]

Longest sequence is:  2229


array([[   0,    0,    0, ...,    0,  141, 1000],
       [   0,    0,    0, ...,  141, 1000,   28],
       [   0,    0,    0, ..., 1000,   28,  865]], dtype=int32)

In [10]:
# Create predictors and label
X, y = padded_sequences[:, :-1], padded_sequences[:, 1:]


X[0], y[0]
# Convert y to categorical
# y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

(array([  0,   0,   0, ...,   0,   0, 141], dtype=int32),
 array([   0,    0,    0, ...,    0,  141, 1000], dtype=int32))

In [11]:
X[5].shape

(2228,)

## Create Model

Transformers architecture

In [12]:
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([Dense(ff_dim, activation="relu"), Dense(embed_dim), ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)
        
    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)  

In [13]:
# Model architecture
class PoemGenerator(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim, num_heads, ff_dim):
        super(PoemGenerator, self).__init__()
        self.embedding = Embedding(vocab_size, embed_dim)
        self.transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.final_layer = Dense(vocab_size)
    
    def call(self, inputs, training=False):
        x = self.embedding(inputs)
        x = self.transformer_block(x, training=training)
        return self.final_layer(x)

In [14]:
# Compile the model
embedding_dim = 64

model = PoemGenerator(vocab_size, embed_dim=embedding_dim, num_heads=2, ff_dim=32)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

## Training

In [15]:
history = model.fit(X, y, epochs=50, batch_size=64, validation_split=0.2)

Epoch 1/50


I0000 00:00:1725188469.467115      66 service.cc:145] XLA service 0x7ec154011ea0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725188469.467160      66 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
W0000 00:00:1725188469.838625      66 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert
I0000 00:00:1725188485.472408      66 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m603/604[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 302ms/step - loss: 2.0711

W0000 00:00:1725188667.900800      65 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert


[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 357ms/step - loss: 2.0669 - val_loss: 0.7483
Epoch 2/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 330ms/step - loss: 0.2528 - val_loss: 0.7900
Epoch 3/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 330ms/step - loss: 0.2114 - val_loss: 0.8272
Epoch 4/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 330ms/step - loss: 0.1863 - val_loss: 0.8696
Epoch 5/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 330ms/step - loss: 0.1685 - val_loss: 0.9138
Epoch 6/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 330ms/step - loss: 0.1546 - val_loss: 0.9589
Epoch 7/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 330ms/step - loss: 0.1436 - val_loss: 1.0091
Epoch 8/50
[1m604/604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 330ms/step - loss: 0.1332 - val_loss: 1.0569
Epoch 9/50
[1m604/

## Generate Poem

In [16]:
def generate_poem(model, start_string, num_generate=20, temperature=1.0):
    input_eval = tokenizer.texts_to_sequences([start_string])[0]
    input_eval = pad_sequences([input_eval], maxlen=max_seq_length-1, padding='post')
    text_generated = []
    
    for _ in range(num_generate):
        predicted_id = 0
        while predicted_id == 0:   
            predictions = model(input_eval, training=False)
            predictions = tf.squeeze(predictions, 0)
            predictions = predictions / temperature
            predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        
        text_generated.append(tokenizer.index_word[predicted_id])
        
        input_eval = tf.concat([input_eval[:, 1:], [[predicted_id]]], axis=-1)
        
    return (start_string + ' ' + ' '.join(text_generated))

start_string = "love is"
print(generate_poem(model, start_string="love"))

love love love love that on love that sickness brags itself in her name so that these worlds false bonds in
