In [1]:
import os
import time
import numpy as np
import tensorflow as tf

In [2]:
path_to_file = tf.keras.utils.get_file(
    'shakespeare.txt',
    'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt',
)
     

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step


In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f"Lenght of text: {len(text)} characters")

Lenght of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [6]:
example_text = ['stefan', 'nafets']

chars = tf.strings.unicode_split(example_text, input_encoding='UTF-8')
chars
     

<tf.RaggedTensor [[b's', b't', b'e', b'f', b'a', b'n'],
 [b'n', b'a', b'f', b'e', b't', b's']]>

In [7]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab),
    mask_token=None
)

In [8]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[58, 59, 44, 45, 40, 53],
 [53, 40, 45, 44, 59, 58]]>

In [9]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(),
    invert=True,
    mask_token=None
)

In [10]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b's', b't', b'e', b'f', b'a', b'n'],
 [b'n', b'a', b'f', b'e', b't', b's']]>

In [11]:
tf.strings.reduce_join(chars, axis=1).numpy()
     

array([b'stefan', b'nafets'], dtype=object)

In [12]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=0)

In [13]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1], dtype=int64)>

In [14]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
     

In [15]:
for ids in ids_dataset.take(10):
  print(chars_from_ids(ids).numpy().decode('UTF-8'))

F
i
r
s
t
 
C
i
t
i


In [16]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)
     

In [17]:
sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [18]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [19]:
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [20]:
split_input_target(list('Tensorflow'))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [21]:
dataset = sequences.map(split_input_target)

In [22]:
for input_example, target_example in dataset.take(1):
  print('Input  :', text_from_ids(input_example).numpy())
  print('Target :', text_from_ids(target_example).numpy())

Input  : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target : b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [23]:
# Batch size
BATCH_SIZE = 63

"""Buffer size to shuffle the dataset
(TF data is designed to work wiht possobly infinite sequences,
so it dosen't attempt to shuffle the entire sequence in memory. Instadfe
it maintaines a buffer is which it shuffles elements)"""
BUFFER_SIZE = 10000

dataset = (
    dataset.shuffle(BUFFER_SIZE) \
    .batch(BATCH_SIZE, drop_remainder=True) \
    .prefetch(tf.data.experimental.AUTOTUNE)
)
dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(63, 100), dtype=tf.int64, name=None), TensorSpec(shape=(63, 100), dtype=tf.int64, name=None))>

In [32]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super(MyModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(
            rnn_units,
            return_sequences=True,
            return_state=True
        )
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs)
        
        # Pass inputs through GRU layer
        x, states = self.gru(x, initial_state=states, training=training)
        
        # Pass GRU outputs through Dense layer
        x = self.dense(x)

        if return_state:
            return x, states
        else:
            return x


In [33]:
model.summary()

In [40]:
import tensorflow as tf

class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(
            rnn_units,
            return_sequences=True,
            return_state=True
        )
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs)

        # Initialize states if not provided
        if states is None:
            batch_size = tf.shape(inputs)[0]  # Get the batch size from the inputs
            states = tf.zeros((batch_size, self.gru.units))  # Initialize the states manually with the correct shape

        # Run GRU layer
        x, states = self.gru(x, initial_state=states, training=training)

        # Output dense layer
        x = self.dense(x)

        # Return both outputs and states if required
        if return_state:
            return x, states
        else:
            return x

# Assuming the vocabulary size, embedding dimension, and RNN units are set
vocab_size = 10000
embedding_dim = 256
rnn_units = 1024

# Create the model
model = MyModel(vocab_size, embedding_dim, rnn_units)

# Test the model on an input batch
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(
        example_batch_predictions.shape,
        "# (batch_size, sequence_length, vocab_size)"
    )


(63, 100, 10000) # (batch_size, sequence_length, vocab_size)


In [42]:
# Assuming you already have a batch of predictions
for input_example_batch, target_example_batch in dataset.take(1):
    # Get predictions from the model for the input batch
    example_batch_predictions = model(input_example_batch)
    
    # Print the shape of the output predictions
    print(
        example_batch_predictions.shape,
        "# (batch_size, sequence_length, vocab_size)"
    )
    
    # Sampling the next character predictions from the logits
    sampled_indices = tf.random.categorical(
        example_batch_predictions[0],  # Use the first sample in the batch
        num_samples=1
    )
    
    # Squeeze to remove extra dimensions
    sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
    
    # Print input and sampled next characters
    print('Input:', text_from_ids(input_example_batch[0]).numpy(), sep='\n')
    print('Next Char Predictions:', text_from_ids(sampled_indices).numpy(), sep='\n')


(63, 100, 10000) # (batch_size, sequence_length, vocab_size)
Input:
b', his unfeigned friend,\nThat, if King Lewis vouchsafe to furnish us\nWith some few bands of chosen so'
Next Char Predictions:
b'[UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK]s[UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK][UNK]'


In [43]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
     

In [44]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print(
    'Prediciton shape: ',
    example_batch_predictions.shape,
    '# (batch_size, sequence_length, vocab_size',
    sep='\n'
)
print('New loss:        ', example_batch_mean_loss)

Prediciton shape: 
(63, 100, 10000)
# (batch_size, sequence_length, vocab_size
New loss:         tf.Tensor(9.210249, shape=(), dtype=float32)


In [45]:
tf.exp(example_batch_mean_loss).numpy()
     

9999.086

In [46]:
model.compile(optimizer='adam', loss=loss)

In [49]:
# Name of the checkpoint file with the correct extension
checkpoint_prefix = os.path.join(checkpoint_path, "ckpt_epoch_{epoch:02d}.weights.h5")

# Create the ModelCheckpoint callback
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True  # Corrected argument
)


In [50]:
%%time
EPOCHS = 10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2600s[0m 15s/step - loss: 4.0092
Epoch 2/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3051s[0m 17s/step - loss: 2.6908
Epoch 3/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1486s[0m 8s/step - loss: 2.4985
Epoch 4/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2351s[0m 13s/step - loss: 2.4103
Epoch 5/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2226s[0m 13s/step - loss: 2.3539
Epoch 6/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4362s[0m 25s/step - loss: 2.3189
Epoch 7/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3643s[0m 21s/step - loss: 2.3025
Epoch 8/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21161s[0m 121s/step - loss: 2.2774
Epoch 9/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1499s[0m 9s/step - loss: 2.2619
Epoch 10/10
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [81]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        # Create a mask to prevent "[UNK]" from being generated.
        skip_ids = self.ids_from_chars(["[UNK]"])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float("inf")] * len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())],
        )
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, "UTF-8")
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Run the model.
        # predicted_logits.shape is [batch, char, next_char_logits]
        predicted_logits, states = self.model(
            inputs=input_ids, states=states, return_state=True
        )
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits / self.temperature
        # Apply the prediction mask: prevent "[UNK]" from being generated.
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, states
     


In [82]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [83]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(
        next_char, states = states
    )
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), "\n\n" + "_" * 80)
print("\nRun time:", end - start)

ROMEO:
Anding Loun croke you, wase't Hars, triengay theed mastanges, maskeN to hif's fon thitherf serme sheuch your
that eed a po ofpered that in
Whous sailn, fom the be thum, and to herret but sour.

MENVINO:
On the manning I dirg'ds:
Thes sifvit nik o foll the sime tith liff dake:

astile I hey, blieg watie vame.

COLIO:
O, youl dease.

MURIUS:
Oo speef,
Lo lond I ho knomand, sher quke
Seepale the may brees grears,
Whe lavening werver, anv thes the toous?

MORNCIO:
Mestlen, 't is then wish:
Then thie mroun of lose thes your.

DUKES:
Se mo eve
Ad Voodf the serifnout
Do the kinc, thould stawid is mave ig gho dain? bit too,
Ipatet with crees, the lass ig
Oven, eakn live, me youn beat dere, e brow his denens.

CETon his not gomtherint to theid deate tond.
The prave arr me treich ou mor, makn's aigh,
Thot, there in in Palconce a cojnicoong?

HUCHIO:
Ho der a woms fnom yrem on uther reesing, thenm-patice gonoropest he alm ad of that,
Bat hart, fee wher lathert to dorcebe Nother, Ghange my 

In [84]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO', 'ROMEO', 'ROMEO', 'ROMEO', 'ROMEO' ])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(
        next_char, states = states
    )
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + "_" * 80)
print("\nRun time", end - start)
     


tf.Tensor(
[b"ROMEO:\nGut Morpery beecr's megeforge, and paining.\n\nATROUE:\nO sord for aver oface, E\nTove putsst thy Onjuw de tollf 'to ther whent then ence\nthee yow thome asverd\nThe mxeme, all theme if bete, ard Sat yiur.\nBus htarkied, dy tour that ucnctope.\n\nGRRINGULES:\nFor evulles fishendy bent and, ghath hat hese my bolt,?\nNut me seld apr sent weis nead how\nyour me noth lile vist tod gate foble Kisn?\nBuse ofreers, and gads, slain rawn!\n\nABENLERI:\nI sau, wow got my doo, Triund recins to misperous, thou hamak\nTo haved gon'd a maylf; the devem now\nIL\nYom.\n\nCRUUCINI EFNAN:\nWhe the intery gore we porn.\nHerurl wid loung, frith ta tho kidg dean of\nThat'ernss bed, to mest un there curorr brownes and the prace offorce.\nUSThe go,\nWill mins wom! an the ken wrich whin.\nHever, you, hag mese with, preacarse theiser, in farourd\nay youg wisben my preate,\nAnd shain, pray sill eak\nyou too in the peut love freachscorcast.\n\nARENTES:\n'dain: it od ut king fod taden, Iwan 

In [86]:
@tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
def serving_fn(inputs):
    # Your serving logic here
    return one_step_model(inputs)


In [89]:
# Define a simple model
class SimpleModel(tf.keras.Model):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.dense = tf.keras.layers.Dense(10)

    def call(self, inputs):
        return self.dense(inputs)

# Instantiate and save the model
model = SimpleModel()
tf.saved_model.save(model, 'one_step')  # Ensure this path is correct


INFO:tensorflow:Assets written to: one_step\assets


INFO:tensorflow:Assets written to: one_step\assets


In [93]:
import tensorflow as tf

# Load the model
one_step_reloaded = tf.saved_model.load('one_step')

# Check if the model is loaded correctly
print(one_step_reloaded)


<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x000001A48CF5AA50>


In [95]:
import tensorflow as tf

# Define your model class with custom methods
class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Define layers, etc.
    
    def call(self, inputs):
        # Define the forward pass
        return inputs  # Replace with actual model logic

    @tf.function
    def generate_one_step(self, inputs, states=None):
        # Define the method for generating text
        return self(inputs)  # Replace with actual logic

# Instantiate and save the model
my_model = MyModel()
save_path = 'one_step'
tf.saved_model.save(my_model, save_path)

# Load the model
loaded_model = tf.saved_model.load(save_path)

# Inspect methods of the loaded model
print(dir(loaded_model))


INFO:tensorflow:Assets written to: one_step\assets


INFO:tensorflow:Assets written to: one_step\assets


['__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_add_trackable_child', '_add_variable_with_custom_getter', '_checkpoint_adapter', '_checkpoint_dependencies', '_copy_trackable_to_cpu', '_deferred_dependencies', '_delete_tracking', '_deserialization_dependencies', '_deserialize_from_proto', '_export_to_saved_model_graph', '_gather_saveables_for_checkpoint', '_handle_deferred_dependencies', '_inbound_nodes', '_lookup_dependency', '_loss_ids', '_losses', '_losses_override', '_maybe_initialize_trackable', '_name_based_attribute_restore', '_name_based_restores', '_no_dependency', '_object_identifier', '_outbound_nodes', '_preload_simple_restoration', '_restore_from

In [79]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
)

In [80]:
model.fit(dataset, epochs=1)

[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1582s[0m 9s/step - loss: 2.4950


<keras.src.callbacks.history.History at 0x1a48c9f6f00>