In [1]:
import tensorflow as tf

import numpy as np
import os
import time

In [2]:
text = open('ValentinePoemData.txt', 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1647133 characters


In [3]:
print(text[:250])

Happy Valentine’s Day to the most special person in my life.

I love you more than pizza.

Here's to being my emergency contact someday.

You're my everything. Happy Valentine's Day!

Are you a banana? Because I find you a-peel-ing.

Happy Valentine'


In [4]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

63 unique characters


In [5]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [6]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [7]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[36, 37, 38, 39, 40, 41, 42], [59, 60, 61]]>

In [8]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [9]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [10]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [11]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [12]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1647133,), dtype=int64, numpy=array([20, 36, 51, ..., 36, 60,  3])>

In [13]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [14]:
for ids in ids_dataset.take(15):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

H
a
p
p
y
 
V
a
l
e
n
t
i
n
e


In [15]:
seq_length = 100

In [16]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
    print(chars_from_ids(seq))

tf.Tensor(
[b'H' b'a' b'p' b'p' b'y' b' ' b'V' b'a' b'l' b'e' b'n' b't' b'i' b'n'
 b'e' b'\xe2\x80\x99' b's' b' ' b'D' b'a' b'y' b' ' b't' b'o' b' ' b't'
 b'h' b'e' b' ' b'm' b'o' b's' b't' b' ' b's' b'p' b'e' b'c' b'i' b'a'
 b'l' b' ' b'p' b'e' b'r' b's' b'o' b'n' b' ' b'i' b'n' b' ' b'm' b'y'
 b' ' b'l' b'i' b'f' b'e' b'.' b'\n' b'\n' b'I' b' ' b'l' b'o' b'v' b'e'
 b' ' b'y' b'o' b'u' b' ' b'm' b'o' b'r' b'e' b' ' b't' b'h' b'a' b'n'
 b' ' b'p' b'i' b'z' b'z' b'a' b'.' b'\n' b'\n' b'H' b'e' b'r' b'e' b"'"
 b's' b' ' b't' b'o' b' '], shape=(101,), dtype=string)


In [17]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b"Happy Valentine\xe2\x80\x99s Day to the most special person in my life.\n\nI love you more than pizza.\n\nHere's to "
b"being my emergency contact someday.\n\nYou're my everything. Happy Valentine's Day!\n\nAre you a banana? "
b"Because I find you a-peel-ing.\n\nHappy Valentine's Day, handsome.\n\nYou're the only person I send heart"
b' eye emojis to.\n\nValentine, you take my breath away, every single day.\n\nIt\xe2\x80\x99s just one day in the year'
b', but you should know that I love you every day and every moment.\n\nThe more time we spend together, t'


In [18]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [19]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [20]:
dataset = sequences.map(split_input_target)

In [21]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b"Happy Valentine\xe2\x80\x99s Day to the most special person in my life.\n\nI love you more than pizza.\n\nHere's to"
Target: b"appy Valentine\xe2\x80\x99s Day to the most special person in my life.\n\nI love you more than pizza.\n\nHere's to "


2023-02-14 16:31:46.968859: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [22]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [23]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [24]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x

In [25]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [26]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 64) # (batch_size, sequence_length, vocab_size)


In [27]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16384     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  65600     
                                                                 
Total params: 4,020,288
Trainable params: 4,020,288
Non-trainable params: 0
_________________________________________________________________


In [28]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [29]:
sampled_indices

array([48, 56, 47, 51, 53, 51,  0,  2,  7, 29,  9, 54, 20, 19, 30, 20, 30,
       18,  2, 31, 53, 58, 20, 16,  9, 38, 12, 48,  1,  9, 29, 32, 38, 55,
       33,  2, 63,  1, 47,  8, 32,  6, 13, 51, 52, 49, 14, 44, 13,  6, 16,
       28, 18, 48,  3, 54,  4, 13, 12, 47, 30, 53, 58, 16,  1, 15, 30, 59,
       60, 30, 36, 13, 22,  6, 35, 21,  2, 35, 36,  5, 51, 27, 43, 32, 24,
        1, 46, 62, 41, 36, 15, 57, 15, 35, 20, 26, 31, 18,  4, 53])

In [30]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"Day, handsome.\n\nYou're the only person I send heart eye emojis to.\n\nValentine, you take my breath aw"

Next Char Predictions:
 b"mulprp[UNK] ,R.sHGSHSF TrwHD.c?m\n.RUctV \xe2\x80\x99\nl-U)ApqnBiA)DPFm!s'A?lSrwD\nCSxySaAJ)YI Ya(pOhUL\nk\xe2\x80\x94faCvCYHNTF'r"


In [31]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [32]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 64)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.158709, shape=(), dtype=float32)


In [33]:
tf.exp(example_batch_mean_loss).numpy()

63.98886

In [34]:
model.compile(optimizer='adam', loss=loss)

In [35]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [36]:
EPOCHS = 50

In [37]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [38]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
 
    @tf.function
    def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
        predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
        return predicted_chars, states
    
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [46]:
inputstring = input("Enter the first phrase :) - ")
lengths = int(input("Enter number of characters - "))
print(" ")
start = time.time()
states = None
next_char = tf.constant([inputstring])
result = [next_char]

for n in range(lengths):
    next_char, states = one_step_model.generate_one_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Enter the first phrase :) - Adya, i love you. You are perfect.
Enter number of characters - 1000
 
Adya, i love you. You are perfect.

I desire you my love in every moment.
As time adds years like sands on the beach,
When you sense your beauty slipping to the wind,
I will want you so much more than now.

I could watch the sunrise, I could watch the sunset.
I could walk in the rain without getting wet.
I could witness their beauty as the flowers bloom.
I could I could listen to bird songs as the new day dawns.
I could feel the sunshine through the darkest clouds.
I could hear a sweet whisper through the roar of a crowd.
I could walk on water without soaking my feet.
I could travel the world in less than a week.
I could ponder the knowledge passed down by the wise.
I could dwell in a castle built in the sky.
I could paint a portrait and bring it to life.
I could sleep without pain, on the edge of a knife.
I could play a sweet melody to soften the mood.
I could take all the bad and turn i

In [47]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [52]:
states = None
next_char = tf.constant(['Adya '])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
    result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

Adya ust of your kiss.

I desire you when the sun centers the sky
And shadows hide beneath the soles of feet.
When butterflies dance among petals white,
I want the sparkle of your smiling eyes.

I desire you as the moon welcomes the night
And skies are flooded with twinkling bright stars.
When candles flicker their last silent breath,
I then want to feel your sensuous touch.

I desire you when passion swells all your soul
And your lovely face shines with radiance.
When your moist lips are full of love's desire,
I want to feel every pulse of your heart.

I desire you when you are weary and down
And gray skies sprinkle droplets of sad tears.
When the world laughs at your every stumble,
I want to gently sooth your sorrows.

I desire you my love in every moment.
As time adds years like sands on the beach,
When you sense your beauty slipping to the wind,
I will want you so much more than now.

I could watch the sunrise, I could watch the sunset.
I could walk in the rain without getting wet.