In [1]:
"""
Final project Neural Networks

Thijs van der Laan, s3986721
Aydan Yagublu, s3980804
Eric Brouwer, s3934640
Filipe Laitenberger, s3894479

Sources:

https://www.tensorflow.org/text/tutorials/text_generation 
(this tensor flow tutorial served as inspiration for our model 
and taught us how to build a language processing model like this)

"""
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from pylatexenc.latex2text import LatexNodes2Text
import os

In [8]:
filename = 'NN_LN_text.txt'
# open the latex file
latex = open(filename, 'rb').read().decode(encoding='utf-8')
# convert the latex to real text
text = LatexNodes2Text().latex_to_text(latex)

In [9]:
sentences = text.split(".")
words = text.split(" ")
print("Average sentence length: " + str(len(words)/len(sentences)))

Average sentence length: 20.486900206064174


In [10]:
# unique characters in the text
vocab = sorted(set(text))

# convert chars into ids
convertCharactersIntoIds = preprocessing.StringLookup(
    vocabulary=list(vocab), mask_token=None)

# convert ids into chars
convertIdsIntoCharacters = tf.keras.layers.experimental.preprocessing.StringLookup(
    vocabulary=convertCharactersIntoIds.get_vocabulary(), invert=True, mask_token=None)

In [11]:
# convert the entire text to ids
ids = convertCharactersIntoIds(tf.strings.unicode_split(text, 'UTF-8'))

# convert the ids into a dataset
idsDataset = tf.data.Dataset.from_tensor_slices(ids)

# create text sequences from the ids
sequenceLength = 100
sequences = idsDataset.batch(sequenceLength+1, drop_remainder=True)

# split a sequence into an input and a target output. Example: "Hello" -> input: "Hell", output: "ello"
def splitSequenceTrainingExample(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

# split all sequences into input output pairs to create the training dataset
dataset = sequences.map(splitSequenceTrainingExample)

# Batch size
batchSize = 64

# Buffer size to shuffle the dataset
bufferSize = 10000

# shuffle dataset
# Prefetch: while the model is executing training step n, the input pipeline is reading the data for step n+1,
# so that the training time is reduces as much as possible
dataset = dataset.shuffle(bufferSize).batch(batchSize, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

In [12]:
# Length of the vocabulary in chars
vocabularySize = len(vocab)

# The embedding dimension
embeddingDimensions = 256

# Number of RNN units
rnnUnits = 1024

class TextGenerationModel(tf.keras.Model):
  def __init__(self, vocabularySize, embeddingDimensions, rnnUnits):
    super().__init__(self)
    
    # word embedding layer
    self.embedding = tf.keras.layers.Embedding(vocabularySize, embeddingDimensions)
    
    # GRU units
    self.gru = tf.keras.layers.GRU(rnnUnits, return_sequences=True, return_state=True)
    
    # dense layer
    self.dense = tf.keras.layers.Dense(vocabularySize)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    
    # run it through the embedding layer
    x = self.embedding(x, training=training)
    
    # create states if there are none yet
    if states is None:
      states = self.gru.get_initial_state(x)
    
    # run it through the GRU layer
    x, states = self.gru(x, initial_state=states, training=training)
    
    # run it through the dense layer
    x = self.dense(x, training=training)
    
    # return the states and preliminary outputs
    if return_state:
      return x, states

    # return the eventual outputs
    else:
      return x

model = TextGenerationModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocabularySize=len(convertCharactersIntoIds.get_vocabulary()),
    embeddingDimensions=embeddingDimensions,
    rnnUnits=rnnUnits)

In [13]:
# loss function
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

# compile model using adam optimizer
model.compile(optimizer='adam', loss=loss)

# Name of the checkpoint files
checkpointPrefix = os.path.join('./training_checkpoints', "ckpt_{epoch}")

# checkpoint callback for the model to save progress
checkpointCallback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpointPrefix,
    save_weights_only=True)

# train model
history = model.fit(dataset, epochs=20, callbacks=[checkpointCallback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
class GenerateNextCharacter(tf.keras.Model):
  def __init__(self, model, convertIdsIntoCharacters, convertCharactersIntoIds, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.convertIdsIntoCharacters = convertIdsIntoCharacters
    self.convertCharactersIntoIds = convertCharactersIntoIds

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.convertCharactersIntoIds(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(convertCharactersIntoIds.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.convertCharactersIntoIds(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.convertIdsIntoCharacters(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

one_step_model = GenerateNextCharacter(model, convertIdsIntoCharacters, convertCharactersIntoIds)

states = None
next_char = tf.constant(['The Transformer architecture '])
result = [next_char]

for n in range(10000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

The Transformer architecture was done in the Le-araptive member from its exactly this zero. Us important do this transition). For instance, a convinced connect with the empirical risk is still in Figure 13 gives plup, the proposed models of neuron neural learning task within the BM can do that this incommand on plain gradient ∇ℛ(θ^((n)) is the back other blocks or the same time, plefif the same paga. For instance concepts and not before eregay of the Metropolis gradient is important and don’t as a reaction dynamics will cover the same landscape over the basics of the proposal distribution given by a pdf. ]

[As become an important clear in and Equation [eEepricables of equations which certain within-thinking has too structural stability, that is one of the tradifitions to be a quadratic loss of each presentative effects of easy constant for the a learning problem, it consists of sigiofffencouse: simulated annealing — fesired e another to tom No experiment.

The HoffRNN visible brain is