In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import tensorflow as tf
import numpy as np

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 1234
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Get current working directory
cwd = os.getcwd()

# Set GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

# Text generation - Next character prediction
## Charles Dickens


# Dataset

In [None]:
# Prepare dataset
# ---------------

# Read full text
with open(os.path.join(cwd, 'dickens.txt'), 'r') as f:
    full_text = f.read()
f.close()

full_text_length = len(full_text)
print('Full text length:', full_text_length)

# Create vocabulary
vocabulary = sorted(list(set(full_text)))

print('Number of unique characters:', len(vocabulary))
print(vocabulary)

# Dictionaries for char-to-int/int-to-char conversion
ctoi = {c:i for i, c in enumerate(vocabulary)}
itoc = {i:c for i, c in enumerate(vocabulary)}

# Create input-target pairs
# e.g., given an input sequence 
# 'Hell' predict the next character 'o'
# Thus,
# extract from the full text sequences of length seq_length as x and 
# the corresponding seq_length+1 character as target

# Define number of characters to be considered for the prediction
seq_length = 100

X = [] # will contain all the sequences 
Y = [] # will contain for each sequence in X the corresponding expected next character
# Cycle over the full text
step = 1 
for i in range(0, full_text_length - (seq_length), step):
    sequence = full_text[i:i+seq_length]
    target = full_text[i+seq_length]
    X.append([ctoi[c] for c in sequence])
    Y.append(ctoi[target])
    
X = np.array(X)
Y = np.array(Y)
    
print('Number of sequences in the dataset:', len(X))

# Shuffle Dataset
indices = np.arange(len(X))
np.random.shuffle(indices)

X = X[indices]
Y = Y[indices]

# Divide into training and validation sets
# e.g., ~90% for training and ~10% for validation
num_train = int(0.9*len(X))
x_train = X[:num_train] 
y_train = Y[:num_train]
x_valid = X[num_train:]
y_valid = Y[num_train:]

# convert in numpy arrays
x_train = np.array(x_train)
y_train = np.array(y_train)
x_valid = np.array(x_valid)
y_valid = np.array(y_valid)

In [None]:
# Create data loaders
# -------------------

# Batch size
bs = 256

# Encode characters. Many ways, for example one-hot encoding.
def char_encode(x_, y_):
    return tf.one_hot(x_, len(vocabulary)), tf.one_hot(y_, len(vocabulary))

# Prepare input x to match recurrent layer input shape 
# -> (bs, seq_length, input_size)

# Training
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0])
train_dataset = train_dataset.map(char_encode)
train_dataset = train_dataset.batch(bs)
train_dataset = train_dataset.repeat()

# Validation
valid_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
valid_dataset = valid_dataset.map(char_encode)
valid_dataset = valid_dataset.batch(bs)
valid_dataset = valid_dataset.repeat()

# Model

In [None]:
# Build Recurrent Neural Network
# ------------------------------

# Hidden size (state)
h_size = 128

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units=h_size, batch_input_shape=[None, seq_length, len(vocabulary)], 
                               return_sequences=True, stateful=False))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.LSTM(units=h_size, return_sequences=False, stateful=False))
model.add(tf.keras.layers.Dropout(0.2))
# model.add(tf.keras.layers.LSTM(units=h_size, return_sequences=False)
model.add(tf.keras.layers.Dense(units=len(vocabulary), activation='softmax'))

In [None]:
model.summary()
model.weights

# Prepare model for training

In [None]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-2
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
import os
from datetime import datetime

cwd = os.getcwd()

exps_dir = os.path.join(cwd, 'dickens_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

exp_name = 'exp'

exp_dir = os.path.join(exps_dir, exp_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), 
                                                   save_weights_only=True)  # False to save the model directly
callbacks.append(ckpt_callback)

# ----------------

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=1)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callback.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

# ---------------------------------

model.fit(x=train_dataset,
          epochs=100,  #### set repeat in training dataset
          steps_per_epoch=int(np.ceil(x_train.shape[0] / bs)),
          validation_data=valid_dataset,
          validation_steps=int(np.ceil(x_valid.shape[0] / bs)), 
          callbacks=callbacks)

# How to visualize Tensorboard

# 1. tensorboard --logdir EXPERIMENTS_DIR --port PORT     <- from terminal
# 2. localhost:PORT   <- in your browser