In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
import tensorflow as tf
import numpy as np

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 12
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Get current working directory
cwd = os.getcwd()

# Set GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

# Text generation - Next character prediction
## Charles Dickens


# Dataset

In [3]:
# Prepare dataset
# ---------------

# Read full text
with open(os.path.join(cwd, 'dickens.txt'), 'r') as f:
    full_text = f.read()
f.close()

full_text_length = len(full_text)
print('Full text length:', full_text_length)

# Create vocabulary
vocabulary = sorted(list(set(full_text)))

print('Number of unique characters:', len(vocabulary))
print(vocabulary)

# Dictionaries for char-to-int/int-to-char conversion
ctoi = {c:i for i, c in enumerate(vocabulary)}
itoc = {i:c for i, c in enumerate(vocabulary)}

seq_length = 100

Full text length: 157789
Number of unique characters: 65
['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ö']


# Model

In [4]:
# Build Recurrent Neural Network
# ------------------------------

# Hidden size (state)
h_size = 128

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units=h_size, batch_input_shape=[None, seq_length, len(vocabulary)], 
                               return_sequences=True, stateful=False))
model.add(tf.keras.layers.LSTM(units=h_size, return_sequences=False, stateful=False))
model.add(tf.keras.layers.Dense(units=len(vocabulary), activation='softmax'))

In [5]:
model.summary()
model.weights

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 128)          99328     
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 65)                8385      
Total params: 239,297
Trainable params: 239,297
Non-trainable params: 0
_________________________________________________________________


[<tf.Variable 'lstm/kernel:0' shape=(65, 512) dtype=float32, numpy=
 array([[-0.00464945, -0.03394137, -0.000799  , ..., -0.02225107,
         -0.06204449,  0.03887913],
        [-0.07563964,  0.09494284, -0.01195837, ...,  0.06521125,
         -0.06513311, -0.06987695],
        [-0.00498689,  0.05380745,  0.04336335, ..., -0.01257289,
          0.09079292, -0.02371988],
        ...,
        [ 0.00502372, -0.04001955, -0.01122119, ...,  0.09009796,
         -0.07304128, -0.07973754],
        [-0.00993448, -0.08406316, -0.08523064, ...,  0.0843312 ,
         -0.04517664, -0.05949436],
        [ 0.09082402, -0.08354253, -0.06263713, ...,  0.05646159,
         -0.06945679, -0.04800734]], dtype=float32)>,
 <tf.Variable 'lstm/recurrent_kernel:0' shape=(128, 512) dtype=float32, numpy=
 array([[-6.6588640e-02,  3.6324739e-02, -8.2312122e-02, ...,
          2.5029799e-02, -1.3928058e-02, -7.1377113e-02],
        [-1.1753984e-02,  2.6283264e-02, -2.6581895e-02, ...,
         -4.0398641e-03, -3.

# Compile the model

In [6]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-2
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# Generate text

In [7]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
# Set number of characters we want to generate
generation_length = 100

# Get random seed sequence
start_idx = np.random.randint(0, full_text_length - seq_length)

seed_sentence = full_text[start_idx:start_idx+seq_length]

print('----- Seed sequence:')
print(seed_sentence)

in_onehot = np.zeros([1, seq_length, len(vocabulary)])
for t_idx, c in enumerate(seed_sentence):
    in_onehot[:, t_idx, ctoi[c]] = 1.
    
generated_sentence = seed_sentence
    
for i in range(generation_length):
        
    preds = model.predict(in_onehot, verbose=0)[0]
    
    # Two main ways of predicting
    # dummy: argmax
    # next_char = np.argmax(preds[-1], temperature=0.5)
    # sampling
    # less the temperature more predictable is the output
    next_char = sample(preds, temperature=0.5)  # next_char is the id
    
    next_char_onehot = np.zeros([1, 1, len(vocabulary)])
    next_char_onehot[:, :, next_char] = 1.
    
    in_onehot = np.concatenate([in_onehot, next_char_onehot], axis=1)
    in_onehot = in_onehot[:, 1:, :]
    
    generated_sentence += itoc[next_char]

print('\n----- Generated Sentence')
print(generated_sentence)

print('\n----- Original Sentence')
original_sentence = full_text[start_idx:start_idx+len(generated_sentence)]
print(original_sentence)

----- Seed sequence:
 cried.

Shaving was not an easy task, for his hand continued to
shake very much; and shaving requir

----- Generated Sentence
 cried.

Shaving was not an easy task, for his hand continued to
shake very much; and shaving requirS,klDQ:zrCQ)mzp!UgUNoRk'MtBtZv'jNq!YDz?jSvrJ,lACUwPF-m,GINTxBooSöQBUyRWöVprIwncQ:CQDHx"t?iqu-puQ .U'

----- Original Sentence
 cried.

Shaving was not an easy task, for his hand continued to
shake very much; and shaving requires attention, even when
you don't dance while you are at it. But if he had cut the
end of his nose o


In [9]:
# This code is used to visualize different results at different epochs (starting, intermediate, final)
# ADAPT THIS TO YOUR CODE
for epoch in range(1, 101, 49):
    
    print('\nModel epoch:', epoch)
    print('###############')
    
    if epoch < 10:
        epoch_str = '0'+str(epoch)
    else:
        epoch_str = str(epoch)
    
    # Load Model at current epoch
    model.load_weights(os.path.join(
        cwd, 'dickens_experiments', 'exp_Dec12_22-18-56', 'ckpts', 'cp_'+epoch_str+'.ckpt'))
    
    print('\n----- Seed sequence:')
    print(seed_sentence)

    in_onehot = np.zeros([1, seq_length, len(vocabulary)])
    for t_idx, c in enumerate(seed_sentence):
        in_onehot[:, t_idx, ctoi[c]] = 1.

    generated_sentence = seed_sentence

    for i in range(generation_length):

        preds = model.predict(in_onehot, verbose=0)[0]

        # Two main ways of predicting
        # dummy: argmax
        # next_char = np.argmax(preds[-1])
        # sampling
        # less the temperature more predictable is the output
        next_char = sample(preds, temperature=0.5)  # next_char is the id

        next_char_onehot = np.zeros([1, 1, len(vocabulary)])
        next_char_onehot[:, :, next_char] = 1.

        in_onehot = np.concatenate([in_onehot, next_char_onehot], axis=1)
        in_onehot = in_onehot[:, 1:, :]

        generated_sentence += itoc[next_char]


    print('\n----- Generated Sentence')
    print(generated_sentence)


Model epoch: 1
###############


InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts/cp_01.ckpt: Not found: /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts; No such file or directory

# Visualize most probable future characters

In [10]:
import matplotlib.pyplot as plt

%matplotlib notebook

# Load Model at wanted epoch
model.load_weights(os.path.join(
    cwd, 'dickens_experiments', 'exp_Dec12_22-18-56', 'ckpts', 'cp_100.ckpt'))

# Get random slice from text of length 2*seq_length
start_idx = np.random.randint(0, full_text_length - seq_length)

text_slice = full_text[start_idx:start_idx+2*seq_length]

seed_sentence = text_slice[:seq_length]

in_onehot = np.zeros([1, seq_length, len(vocabulary)])
for t_idx, c in enumerate(seed_sentence):
    in_onehot[:, t_idx, ctoi[c]] = 1.

print('\n----- Seed sequence:')
print(seed_sentence)

next_char = text_slice[seq_length-1]

next_chars = []
next_chars.append(next_char)
probs = []

for i in range(seq_length):

    preds = model.predict(in_onehot, verbose=0)[0]
    
    ordered_preds = np.argsort(preds)[::-1]
   
    probs.append([itoc[ordered_preds[0]], itoc[ordered_preds[1]], 
          itoc[ordered_preds[2]], itoc[ordered_preds[3]], 
          itoc[ordered_preds[4]]])
          
    next_char = text_slice[seq_length+i]
    next_chars.append(next_char)
    next_char_id = ctoi[next_char]

    next_char_onehot = np.zeros([1, 1, len(vocabulary)])
    next_char_onehot[:, :, next_char_id] = 1.

    in_onehot = np.concatenate([in_onehot, next_char_onehot], axis=1)
    in_onehot = in_onehot[:, 1:, :]
    
plt.figure(figsize=(20,2))
clust_data = np.array(probs).T
collabel=next_chars
table = plt.table(cellText=clust_data,colLabels=collabel, loc='center')
plt.show()

InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts/cp_100.ckpt: Not found: /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts; No such file or directory

# Visualize LSTM hidden neurons

In [11]:
from matplotlib import cm

# Load Model at wanted epoch
model.load_weights(os.path.join(
   cwd, 'dickens_experiments', 'exp_Dec12_22-18-56', 'ckpts', 'cp_100.ckpt'))

# Create a new model to get neurons activations
model_in = model.input 
model_out = model.layers[1].output # Which recurrent layer (in this case last one)
new_model = tf.keras.Model(model_in, model_out)

# Get random slice from text of length 2*seq_length
start_idx = np.random.randint(0, full_text_length - seq_length)

text_slice = full_text[start_idx:start_idx+2*seq_length]

seed_sentence = text_slice[:seq_length]

in_onehot = np.zeros([1, seq_length, len(vocabulary)])
for t_idx, c in enumerate(seed_sentence):
    in_onehot[:, t_idx, ctoi[c]] = 1.

print('\n----- Seed sequence:')
print(seed_sentence)

next_char = text_slice[seq_length-1]

next_chars = []
next_chars.append(next_char)
neuron_values = []

which_neuron = 8 # which neuron we want to inspect

for i in range(seq_length):

    lstm_states = new_model.predict(in_onehot, verbose=0)[0]
    lstm_final_state = lstm_states
    lstm_neuron = lstm_final_state[which_neuron]
    neuron_values.append(lstm_neuron)

    next_char = text_slice[seq_length+i]
    next_chars.append(next_char)
    next_char_id = ctoi[next_char]

    next_char_onehot = np.zeros([1, 1, len(vocabulary)])
    next_char_onehot[:, :, next_char_id] = 1.

    in_onehot = np.concatenate([in_onehot, next_char_onehot], axis=1)
    in_onehot = in_onehot[:, 1:, :]
    
plt.figure(figsize=(20,2))
clust_data = np.expand_dims(np.array(neuron_values), -1).T
collabel=next_chars
norm = plt.Normalize(min(neuron_values)-1, max(neuron_values)+1)
colours = plt.cm.hot(norm(neuron_values))
viridis = cm.get_cmap('PuBu_r', 100)
table = plt.table(cellText=np.expand_dims(np.array(collabel[:-1]), -1).T,
                  cellColours=np.expand_dims(colours, 0), # np.expand_dims(neuron_values, -1).T,
                  # colLabels=collabel, 
                  # colColours=['b','g','r'], 
                  loc='center')
plt.show()

InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts/cp_100.ckpt: Not found: /Users/gleonardo/Desktop/DeepLearning/Lesson3/Notebooks/dickens_experiments/exp_Dec12_22-18-56/ckpts; No such file or directory