In [1]:
import tensorflow as tf
import numpy as np
import pathlib
import logging

from _create_model_util import create_character_model
from _dataset_util import create_dataset_for_training

## Configuration 🔧🔨🔨🔩

In [2]:
logging.basicConfig(level=logging.INFO)


URL = "../data/tinyshakespeare.txt"
BUFFER_SIZE = 10000
VOCAB_SIZE = 66
BLOCK_SIZE = 100
BATCH_SIZE = 64
DIMS = 512
NUM_HEADS = 4
FFNN_UNITS = 2048
DROPOUT_RATE = 0.2
NUM_DECODER_LAYERS = 4

## Loading Data

In [3]:

# getting data
dataset, mapper = create_dataset_for_training(
    url=URL, block_size=BLOCK_SIZE+1, batch_size=BATCH_SIZE, buffer_size=BUFFER_SIZE)


INFO:root:[TASK] >>>>> Create Training Dataset 💾
INFO:root:Reading-Data...
INFO:root:Creating-vocab-maps...
INFO:root:Mapping: map string to integer ids...
INFO:root:Creating fixed length sequences...
INFO:root:Fixed length Sequence created : block size = 101 
INFO:root:Creating-Datset: batch size = 64
INFO:root:[JOB FINISHED] >>>>> Training Dataset created 💾 ✅


## Creating Model 🤖

In [4]:

message = f'''
✅ Character Model Configuration 🤖🔧🔧\n
:::::::::::::::::::::::\n
📖VOCAB_SIZE = {VOCAB_SIZE}\n
🚥BLOCK_SIZE = {BLOCK_SIZE}\n
🚥BATCH_SIZE = {BATCH_SIZE}\n
🚥DIMS = {DIMS}\n
🔨NUM_HEADS = {NUM_HEADS}\n
🔩FFNN_UNITS = {FFNN_UNITS}\n
🪂DROPOUT_RATE = {DROPOUT_RATE}\n
🔧NUM_DECODER_LAYERS = {NUM_DECODER_LAYERS}\n
:::::::::::::::::::::::\n
'''
logging.info('[TASK] >>>>> Creating Character Model')
model = create_character_model(
    VOCAB_SIZE, BLOCK_SIZE, NUM_DECODER_LAYERS, NUM_HEADS, DIMS, FFNN_UNITS, DROPOUT_RATE)

logging.info(message)

INFO:root:[TASK] >>>>> Creating Character Model
INFO:root:[TASK] >>>>> Create Tensorflow character model 🤖
INFO:root:[JOB FINISHED] >>>>> Tensorflow character model 🤖 ✅
INFO:root:
✅ Character Model Configuration 🤖🔧🔧

:::::::::::::::::::::::

📖VOCAB_SIZE = 66

🚥BLOCK_SIZE = 100

🚥BATCH_SIZE = 64

🚥DIMS = 512

🔨NUM_HEADS = 4

🔩FFNN_UNITS = 2048

🪂DROPOUT_RATE = 0.2

🔧NUM_DECODER_LAYERS = 4

:::::::::::::::::::::::




In [5]:
# compile the model
logging.info('[COMPILE THE MODEL]: Adam as optimizer, SparseCategorialCrossentropy as loss-function')
model.compile(
    optimizer='adam', 
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])


logging.info('Model Evaluation 🧾')
model.evaluate(dataset.take(10))


INFO:root:[COMPILE THE MODEL]: Adam as optimizer, SparseCategorialCrossentropy as loss-function
INFO:root:Model Evaluation 🧾




[5.0815534591674805, 0.023874999955296516]

In [6]:
model.load_weights('./saved_model')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x19b071673d0>

In [7]:
logging.info('Model Evaluation 🧾')
model.evaluate(dataset.take(10))

INFO:root:Model Evaluation 🧾




[1.263580322265625, 0.6043437719345093]

## Model Inference

In [82]:
class TextGeneration(tf.Module):
    def __init__(self, model, mapper, block_size):
        super(TextGeneration, self).__init__()
        self.model = model
        self.mapper = mapper
        self.BLOCK_SIZE = block_size

    def process_input(self, data):
        characters = tf.strings.unicode_split(data, 'UTF-8')
        ragged = self.mapper['char_to_id'](characters)
        tensor = tf.keras.utils.pad_sequences(ragged.numpy(), maxlen=self.BLOCK_SIZE, value=2)
        return tensor

    def sampling(self, predictions):
        last = predictions[:, -1, :]
        samples = tf.random.categorical(logits=last, num_samples=1)
        return samples


    def __call__(self, inputs, n_iter=300):
        inputs = self.process_input(inputs)

        for i in tf.range(n_iter):
            # [batch, seq] --> [batch, seq, vocab-size]
            preds = self.model(inputs[:,-self.BLOCK_SIZE:])
            # [batch, seq, vocab-size] --> [batch, 1]
            outputs = self.sampling(preds)
            # concat inputs
            inputs = tf.concat(values=[inputs, outputs], axis=-1)
        
        
        outputs = self.mapper['id_to_str'](inputs)
        return outputs
        

In [83]:
text_gen = TextGeneration(model=model, mapper=mapper, block_size=BLOCK_SIZE)

In [10]:
tf_sample_inputs = [
"""GLOUCESTER:
Two of thy name, both Dukes of Somerset,
Have sold their lives unto the house of York.""",

"""KING EDWARD IV:
Now welcome more, and ten times more beloved,
Than if thou never hadst deserved our hate.""" ,

"""FRIAR LAURENCE:
Hence from Verona art thou banished:
Be patient, for the world is broad and wide."""  ,

"""
PETRUCHIO:
How but well, sir? how but well?
It were impossible I should speed amiss.

BAPTISTA:
"""
]

In [84]:
response = text_gen(tf_sample_inputs, n_iter=100)

In [88]:
for res in response:
    print(res.numpy().decode('utf-8'))
    print('-'*100)

  GLOUCESTER:
Two of thy name, both Dukes of Somerset,
Have sold their lives unto the house of York.

ELBOW:
Are yet think runk so say's numbility.

JOHN OF GAUNT:
And see the best propherous or no, t
----------------------------------------------------------------------------------------------------
EDWARD IV:
Now welcome more, and ten times more beloved,
Than if thou never hadst deserved our hate.

JULIET:
Tut, have I going in his head
disconclaims: If thou art our royalting-tast,
For kissh, nev
----------------------------------------------------------------------------------------------------
   FRIAR LAURENCE:
Hence from Verona art thou banished:
Be patient, for the world is broad and wide.

BRUTUS:
We'll devine, is newly succurpet:
Nor one deserved the maids, we'll come to know other.

B
----------------------------------------------------------------------------------------------------
   
PETRUCHIO:
How but well, sir? how but well?
It were impossible I should speed amiss.

BAPT

## Gist

In [60]:
ragged = tf.strings.unicode_split(tf_sample_inputs, 'UTF-8')
ragged =  mapper['char_to_id'](ragged)
fixed_length = tf.keras.utils.pad_sequences(ragged.numpy(), maxlen=100, value=2)

In [61]:
inputs = fixed_length
for i in range(300):
    # [batch, seq] --> [batch, seq, vocab_size]
    pred = model(inputs)
    # [batch, seq, vocab_size] --> [batch, 1]
    pred = tf.random.categorical(logits=pred[:,-1,:], num_samples=1)
    fixed_length = tf.concat(values=[fixed_length, pred], axis=-1)
    inputs = fixed_length[:,-100:]
    

In [64]:
final_output = mapper['id_to_str'](fixed_length)

In [90]:
print(final_output[0].numpy().decode('utf-8'))

  GLOUCESTER:
Two of thy name, both Dukes of Somerset,
Have sold their lives unto the house of York.

First Murderer:
Had now may heard a thing too the old runks
thread gave and goals pardon of early know
They among there; when so pretty in the city
Most were going to crowns' the nature of king.

LEONTES:
When show you live me?

JULIET:
I would see you his enough bare to his name;
But you have mus


In [91]:
print(final_output[1].numpy().decode('utf-8'))

EDWARD IV:
Now welcome more, and ten times more beloved,
Than if thou never hadst deserved our hate.
Faith, no, we do not summmon to heart: 'Tis come,' for me!
We kissed.
I know news the fives will trust keys;
He fair knows, number's as thy king, and oshe
silence give my gage, as She is nearder his seas fear?
Thou is for thus to the high: what fair sorts and sulls
last before so our turn are as co


In [92]:
print(final_output[2].numpy().decode('utf-8'))

   FRIAR LAURENCE:
Hence from Verona art thou banished:
Be patient, for the world is broad and wide.

FNGRIARD:
What she aswer
Is true approach they do pine.

PONIS:
A maid lade prepared among clamous brand some steads,
Though I takest or a mality;
Who now, my soul as Gremio is tunenown to our kisses.

PETRUCHIO:
Nay, Let me come more my great lord. O, but knock I fought and as this beauty,
Good a


In [93]:
print(final_output[2].numpy().decode('utf-8'))

   FRIAR LAURENCE:
Hence from Verona art thou banished:
Be patient, for the world is broad and wide.

FNGRIARD:
What she aswer
Is true approach they do pine.

PONIS:
A maid lade prepared among clamous brand some steads,
Though I takest or a mality;
Who now, my soul as Gremio is tunenown to our kisses.

PETRUCHIO:
Nay, Let me come more my great lord. O, but knock I fought and as this beauty,
Good a


In [94]:
print(final_output[3].numpy().decode('utf-8'))

   
PETRUCHIO:
How but well, sir? how but well?
It were impossible I should speed amiss.

BAPTISTA:
Great makes not to Petruchio, go sleep in their house.

PETRUCHIO:
I promise! prison, Signior Grant Jupy up.

VINCENTIO:
Aknows boy!

AUFIDIUS:
Not the extremity weep, how to they?

CLAUDIO:
The come;
And for burthen his loves honour truly, Aughs Tybalt you.

SICINIUS:
Where's depleases me?

Pedant:
