In [1]:
import os
import numpy as np
import re
import shutil
import tensorflow as tf
import gensim.downloader as api

In [2]:
DATA_DIR = './data'
CHECKPOINT_DIR = os.path.join(DATA_DIR, "checkpoints")

In [3]:
def download_and_read(urls):
    texts =[]
    for i, url in enumerate(urls):
        p = tf.keras.utils.get_file("ex1-{:d}.txt".format(i), url, cache_dir = ".")
        text = open(p, "r", encoding = 'utf-8').read()
        #remove byte order mark
        text = text.replace("\ufeff", "")
        #remove newlines
        text = text.replace("\n", " ")
        text = re.sub(r'\s+', ' ', text)
        texts.extend(text)
    return texts

In [4]:
texts = download_and_read(["http://www.gutenberg.org/cache/epub/28885/pg28885.txt",
 "https://www.gutenberg.org/files/12/12-0.txt"
])

In [5]:
vocab = sorted(set(texts))
print("vocab size: {:d}".format(len(vocab)))

char2idx = {c:i for i,c in enumerate(vocab)}
idx2char = {i:c for c,i in char2idx.items()}

vocab size: 92


In [6]:
texts_as_ints = np.array([char2idx[c] for c in texts])
data = tf.data.Dataset.from_tensor_slices(texts_as_ints)

# number of characters to show before asking for prediction
#sequences: [None, 100]
seq_length = 100
sequences = data.batch(seq_length+1, drop_remainder = True)

In [7]:
# EMBEDDING_MODEL = api.load('glove-wiki-gigaword-300')

In [8]:
# def build_embedding_matrix(EMBEDDING_MODEL, word2idx, EMBEDDING_DIM):
#     E = np.zeros((len(vocab), EMBEDDING_DIM))
#     for word, idx in word2idx.items():
#         try:
#             E[idx] = EMBEDDING_MODEL.word_vec(word)
#         except KeyError:
#             pass
#     return E

In [9]:
# E = build_embedding_matrix(EMBEDDING_MODEL, char2idx,300)

In [10]:
def split_train_labels(sequences):
    input_seq = sequences[0:-1]
    output_seq = sequences[1:]
    return input_seq, output_seq

In [11]:
sequences = sequences.map(split_train_labels)

In [12]:
for input_seq, output_seq in sequences.take(1):
    print("input:[{:s}]".format("".join([idx2char[i] for i in input_seq.numpy()])))
    print("output:[{:s}]".format("".join([idx2char[i] for i in output_seq.numpy()])))
    print(input_seq.shape)

input:[Project Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll This eBook is for the use of ]
output:[roject Gutenberg's Alice's Adventures in Wonderland, by Lewis Carroll This eBook is for the use of a]
(100,)


In [13]:
#set up for training
#batches: [None, 64, 100]
batch_size = 64
steps_per_epochs = len(texts) // batch_size // seq_length
dataset = sequences.shuffle(10000).batch(batch_size, drop_remainder = True)

In [14]:
class CharGenModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, num_timesteps, rnn_output_dim, **kwargs):
        super(CharGenModel, self).__init__(**kwargs)
        self.embedding_layer = tf.keras.layers.Embedding(
            vocab_size,
            embedding_dim,
#             input_length = seq_length,
#             weights = [E],
#             trainable = False
        )
        self.rnn_layer = tf.keras.layers.GRU(
            rnn_output_dim,
            recurrent_initializer = "glorot_uniform",
            recurrent_activation = 'sigmoid',
            stateful = True,
            return_sequences = True
        )
        self.dense_layer = tf.keras.layers.Dense(vocab_size)
        
    def call(self, x):
        x = self.embedding_layer(x)
        x = self.rnn_layer(x)
        x = self.dense_layer(x)
        return x

In [15]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_output_dim = 100

model = CharGenModel(vocab_size, embedding_dim, seq_length, rnn_output_dim)
model.build(input_shape = (batch_size, seq_length))

In [16]:
def loss(labels, predictions):
    return tf.losses.sparse_categorical_crossentropy(labels, predictions, from_logits = True,)

In [17]:
model.compile(optimizer = tf.optimizers.Adam(), loss = loss)

In [18]:
def generate_text(model, prefix_string, char2idx, idx2char,num_chars_to_generate=1000, temperature=1.0):
    inpuT = [char2idx[s] for s in prefix_string]
    inpuT = tf.expand_dims(inpuT, 0)
    text_generated = []
    model.reset_states()
    for i in range(num_chars_to_generate):
        preds = model(inpuT)
        preds = tf.squeeze(preds, 0) / temperature
        # predict char returned by model
        pred_id = tf.random.categorical(
        preds, num_samples=1)[-1, 0].numpy()
        text_generated.append(idx2char[pred_id])
        # pass the prediction as the next input to the model
        inpuT = tf.expand_dims([pred_id], 0)
    return prefix_string + "".join(text_generated)

In [19]:
# try running some data through the model to validate dimensions
for input_batch, label_batch in dataset.take(1):
    pred_batch = model(input_batch)

print(pred_batch.shape)
assert(pred_batch.shape[0] == batch_size)
assert(pred_batch.shape[1] == seq_length)
assert(pred_batch.shape[2] == vocab_size)

(64, 100, 92)


In [20]:
epochs = 100
callbacks = [tf.keras.callbacks.TensorBoard(log_dir = './logs')]
for i in range(epochs//10):
    model.fit(dataset.repeat(), epochs = 10, steps_per_epoch = steps_per_epochs,
#              callbacks = [checkpoint_callback, tensorboard_callback]
              callbacks = callbacks
             )
    checkpoint_file = os.path.join(CHECKPOINT_DIR, "model_epoch_{:d}".format(i+1))
    model.save_weights(checkpoint_file)
    
    #create generative model using the trained model so far
    gen_model = CharGenModel(vocab_size, embedding_dim, seq_length, rnn_output_dim)
    gen_model.load_weights(checkpoint_file)
    gen_model.build(input_shape = (1, seq_length))
    
    print("after epoch: {:d}".format(i+1))
    print(generate_text(gen_model, "Alice", char2idx, idx2char))
    print("-"*15)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
after epoch: 1
Alice wering wopink one a spyillashinglate-diss. The Rey gor a crould comy thes sunded exeelf tlech cause bdearuteled shat fruy squri‘_ch” ellf and likesle atiol trees ewave got _Alice cang lyok a lous aght it hild marge at lance Gut!" Bo said you Re Queen so portes, I sint hathed how youmed dattinn a axpoor sookn. thut wase shemer freagars. 5'ND_Y R@BGpU TCElCYo7 Tucectruck:-" sars amse I readd in!” stoten Soow. Than a mibutes," "Alice_ I caning her of to _to seectoin a kent of gen aid dewnown farg get of thimall the bogake ufpy Lictiemally” samly Oniinting moprargh a prowntsmou popsely dave tildly arraid the avp!” “zese a make a marragwise. bf. In wancectowh; the Plokn out to “1.Bat, way be, the llas giter waspisilring one munse jurcaind bot,_ is with at mare eclose: "Haint, “and the radeen.”: Hake ‘I that sas to trealf rojecting." [Queen Ouc! Reat it slict. “

after epoch: 3
Alice. What's knd be another lates off," said go on the sarding beess, and sake at yon't they iss, and stood!" 7ad the Project Gutenbe's Doon, she talans 24 Quatter were began backs e inthing over flaut things the King. “What read his stroper Sermass whick on how in a back dished heargrash went, she smakied you’ll every again a go had not herself copying," said Alice: the Caterphily suepfortant all Ahister of the two way cale botely, “in the barch as the Queen time, when she dish in him for drimping him. One oby to Parch-” “Inain, Mar_. “On," Queen. “Is, In your ever maid!- know. Suver I did _cQuee_. "POwactedly, as if she was just it to a brease old hardleour eady-pporsion—or, she go her openwarks (Anl rates to: he sail in a lon lensain,” ald chooge, and their dears of the bokestan eather, "The Poots the cadibe; that she remarked. "Not taid to try to see anxithy pulen crach,” said Ablittle sarding, but Project_. Yes. “Project? and suddenten!" is must on hel. "That, rest

after epoch: 5
Alice could dening answer the toojed, I denain, and shreather head lave!" "Pufe, with all this making ise that here." one loacus with, which from her gapp a losventily: “So,” said Alice, and it was in a puttonce his happened?" "But she conked this very pieral FBoager!” “He wives _in an offerly. "Do _Why?’ _I’ve be go is very slat moss toos. [Alice dounnily out and kiend indea indinut ‘''me large pay.” “Her, you cand a anything derious silenced one of it, and watched a rockial way it, ase raimed the terms on his tong, "that Alice could take I can be the White Found think, a'd no use dream), back, What alf for,” that’l Alice, and it said one lookem one fart it word, and Alice For and minunitt which with beline_," so she fan tone: “but ‘Why_ aboct. So rughteaf: thats, that soulderse of copyrove really cours. A rule, white, it it was down to see, and said in her triady) in a mine--and then, old_ nein the wable emp take your putchimair stonked a comile was a look to betisle’d

after epoch: 7
Alice con’t begin offorphoor? Who said to, bees a Red, thiss to your firstations to keep they help ventures. ‘The Project Gutenberg romes the tome sells Alice was something was Alice," the Gryphon. "U-pry cavive from the pucdosely, diffucters was so tone; "and there make it makes the Wilever here, every down, and surning now, “In!” Alice more of get—or the setting what she had be inventing down in the rige!” “but _Ig's_ think quite qUMAjeck!" said Humpty eut-gune feetn’t was Alice," said thene what's am his coor_ s. “Oh, dienevards, to help kpponsion chile. You must brill such its to elecusear, place so that cry of again. The pig, then they wouldn’t mustrabe.” “What an astoes she had beaution and pice, and seembers tone the Projecurtsel. “Found her Nongrapy in a worldinute,” cried Alice I'm nay. Alice had (But down if wains, or her. "reles to herself, and she large many pinishpy? Now, lovoking it set, the penches stood sortty?" AMicong-breauld the firent!” but he plaught

after epoch: 9
Alice 200x, “Oh: "hught up a copy_ a publtion what she was anywheres it had fue, she sward and she hew incess to have donow know, "unish. “Said so Alice said, Und round so licentl of the damand to works in me there’s Chortspeasing a going to something the Knight producidering in the Project Gutenberg-tm Licermetter falls with cate-lied to my feever it a feot larger?” Alice said to it it clour that she sat quick! What eact, then the sture it, and she was child naute—” Hatterle upon in her use to anyoting it, so—_ mestandons per copying you limn?" "You mounted a tunger to dn another bark," "To you have leg to she only Cin she knute step_ Alice could reme here’s one it her,” Humpty), with his fite not axished in 7’ Alice quicks of her ot leave. Do the datch on comple Raper if you can't or at minute, said too: began know.” “How donain bealthe finity doise him and statu anxideately do a means gry. “Come! What—” (Alice fery," said the King asked, airing it is got to mouse stil