<a href="https://colab.research.google.com/github/RemiCailliot/Deep_Learning_Project/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf

import numpy as np
import os
import time

In [None]:
# Read, then decode for py2 compat.
text = open('/content/drive/MyDrive/Colab files/001ssb.txt', 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1628063 characters


In [None]:
# Take a look at the first 250 characters in text
print(text[:250])

A Game Of Thrones 
Book One of A Song of Ice and Fire 
By George R. R. Martin 
PROLOGUE 
"We should start back," Gared urged as the woods began to grow dark around them. "The wildlings are 
dead." 
"Do the dead frighten you?" Ser Waymar Royce a


In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

79 unique characters


In [None]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [None]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[53, 54, 55, 56, 57, 58, 59], [76, 77, 78]]>

In [None]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [None]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
tf.strings.reduce_join(chars, axis=-1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [None]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

<tf.Tensor: shape=(1628063,), dtype=int64, numpy=array([26,  3, 32, ...,  1,  2,  1])>

In [None]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

A
 
G
a
m
e
 
O
f
 


In [None]:
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

In [None]:
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'A' b' ' b'G' b'a' b'm' b'e' b' ' b'O' b'f' b' ' b'T' b'h' b'r' b'o'
 b'n' b'e' b's' b' ' b'\r' b'\n' b'B' b'o' b'o' b'k' b' ' b'O' b'n' b'e'
 b' ' b'o' b'f' b' ' b'A' b' ' b'S' b'o' b'n' b'g' b' ' b'o' b'f' b' '
 b'I' b'c' b'e' b' ' b'a' b'n' b'd' b' ' b'F' b'i' b'r' b'e' b' ' b'\r'
 b'\n' b'B' b'y' b' ' b'G' b'e' b'o' b'r' b'g' b'e' b' ' b'R' b'.' b' '
 b'R' b'.' b' ' b'M' b'a' b'r' b't' b'i' b'n' b' ' b'\r' b'\n' b'P' b'R'
 b'O' b'L' b'O' b'G' b'U' b'E' b' ' b'\r' b'\n' b'"' b'W' b'e' b' ' b's'
 b'h' b'o' b'u'], shape=(101,), dtype=string)


In [None]:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'A Game Of Thrones \r\nBook One of A Song of Ice and Fire \r\nBy George R. R. Martin \r\nPROLOGUE \r\n"We shou'
b'ld start back," Gared urged as the woods began to grow dark around them. "The wildlings are \r\ndead." '
b'\r\n"Do the dead frighten you?" Ser Waymar Royce asked with just the hint of a smile. \r\nGared did not r'
b'ise to the bait. He was an old man, past fifty, and he had seen the lordlings come and go. \r\n"Dead is'
b' dead," he said. "We have no business with the dead." \r\n"Are they dead?" Royce asked softly. "What pr'


In [None]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [None]:
dataset = sequences.map(split_input_target)

In [None]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'A Game Of Thrones \r\nBook One of A Song of Ice and Fire \r\nBy George R. R. Martin \r\nPROLOGUE \r\n"We sho'
Target: b' Game Of Thrones \r\nBook One of A Song of Ice and Fire \r\nBy George R. R. Martin \r\nPROLOGUE \r\n"We shou'


In [None]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [None]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
model = MyModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 80) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "my_model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     multiple                  20480     
                                                                 
 gru_1 (GRU)                 multiple                  3938304   
                                                                 
 dense_1 (Dense)             multiple                  82000     
                                                                 
Total params: 4,040,784
Trainable params: 4,040,784
Non-trainable params: 0
_________________________________________________________________


In [None]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
sampled_indices

array([30, 11, 41, 52,  9, 22, 19, 45,  9, 74, 18, 14, 48, 25, 44, 51, 76,
       21, 39, 25, 57, 57, 74, 74, 26, 16, 49, 48, 24, 75, 63, 10, 10, 50,
       58, 47, 53, 63, 39, 61, 74, 13, 10, 57, 15, 29, 25, 40, 71, 78, 28,
       15, 37, 63, 60,  5, 22, 59, 32, 17, 61, 40, 74, 69, 43,  0, 53, 33,
       69, 37, 37, 45, 67, 56, 73, 10, 53, 27,  9, 63, 54, 15, 42, 57, 27,
       59, 79, 23, 42,  6, 47, 44, 44, 72, 13, 71, 39, 35, 64, 32])

In [None]:
print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b' suddenly \r\nforgotten. "What are you doing up there? Why aren\'t you at the feast?" \r\n"Too hot, too n'

Next Char Predictions:
 b'E-P`*96T*v51W?S]x8N?eevvA3XW;wk,,YfVakNiv0,e2D?OszC2Lkh"9gG4iOvqR[UNK]aHqLLTodu,aB*kb2QeBg~:Q\'VSSt0sNJlG'


In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 80)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.3808165, shape=(), dtype=float32)


In [None]:
tf.exp(example_batch_mean_loss).numpy()

79.903244

In [None]:
model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
EPOCHS = 20

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [None]:
start = time.time()
states = None
next_char = tf.constant(['Royce'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

Roycelle carrying the spearms of a gate, young Jon outragged chest and flocked 
upward at his father's eyes. Dragon had led him back to Robb from the Kingslayer rocks and turned it. Ser Waymar Royce fell from his teeth, his lower back weeping her face, she shouted before he approached. 
The wolf was there beside their voices and each weight of a mountain above them. "Though what I can fear Lord Frey, you must. She yanked his seria to cragon for this and white. Catelyn was going to be a knight, armon, Catelyn thought, remembering. Sam shook his head. 
The humak of its razors were everywhere, and not even their wing. When she'd gone easher downgued, as Drogo writes. "My son was easy to keep out into the wind, and they hunted those he would be good more talk. On every light who had been using than smiles and forest and leg a field of new stupid like that. Some of him 
weak in Winterfelled rider, and in his hands were covered with bright gold, the same grey cloaks drove the procession 

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b'ROMEO: \r\nAnd anything Tyrion was too, her first voice surrounded, a sip of smoke and bowed. He would have told a Bearful lie than appectly. \r\nSer Kevan\'s hair was beside the door. "Father, don\'t think I\'d do out this toy, I promise you, no need you\'d be so kind, child, not me. All you need a day here." \r\nJomar gave a reefer from the dirt and looked at him. "My lord, will only trach or his name day," Tyrion told him as his sons settled darker stunged floor. "Greatjon Uncler on you to serve you to cherish the steps of the poppy." \r\n"The girl asked as to these two knights and fresh." \r\nSwinging approached. "You have told that?" \r\n"Can\'t you think?" \r\n"You told them, Petyr," she said, in arrors rippled onto its skun-shopped clothing of his. "Alone. Yes, yes, my lord, no." She commanded his way down and urged her carefully snorting forward. "Most lace as you find. I should have been best," she told him. "The Ansalk will outrangerated his brother Jaime as wel

In [None]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [None]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO: Her brother was miles from Mob', half-meanund it kings were all wise, Catelyn, Tyrion says we shoul
