### A Brief History of Time - Stephen Hawking

In [1]:
# Importing Lobraries
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [8]:
# Open the text file
with open('./History of Time.txt', 'rb') as f:
    text = f.read().decode(encoding='utf-8')
print(len(text))
print(text[:100])

392175
A BRIEF HISTORY OF TIME

About the Book
Was there a beginning of time? Could time run backwards? 


In [9]:
vocab = sorted(list(set(text))) # vocabulary
vocab_size = len(vocab) # len of the vocabulary
char2id = {u:i for i,u in enumerate(vocab)} # mapping the character from the text to a specific id
id2char = np.array(vocab)
text_as_ip = np.array([char2id[c] for c in text]) # storing characters as their ids
print(vocab_size)
print(f"'{text[:20]}' mapped to {text_as_ip[:20]}")

86
'A BRIEF HISTORY OF T' mapped to [26  2 27 43 34 30 31  2 33 34 44 45 40 43 50  2 40 31  2 45]


In [10]:
BATCH_SIZE = 64
SEQ_LEN = 100
example_per_epoch = len(text)//(SEQ_LEN+1)
embedding_dim = 256
rnn_units = 1024

In [11]:
# Cleaning and shaping the data
x_ip = []
y_op = []
for i in range(len(text_as_ip) - SEQ_LEN):
    x_ip.append(text_as_ip[i:SEQ_LEN+i])
    y_op.append(text_as_ip[i+1:SEQ_LEN+i+1])
x_ip = np.array(x_ip)
y_op = np.array(y_op)
x_ip, _, y_op, _ = train_test_split(x_ip, y_op, shuffle=True, test_size=0.00001)

In [12]:
# Splitting the dataset into labels and target
def split_ip_target(ip, op, batch_size):
    data = []
    label = []
    for i in range(0, len(ip) - batch_size, batch_size):
        data.append(ip[i : batch_size+i])
        label.append(op[i : batch_size+i])
    return np.array(data), np.array(label)

In [13]:
data, label = split_ip_target(x_ip, y_op, BATCH_SIZE)

In [14]:
trnx, testx, trny, testy = train_test_split(data, label, shuffle=True, test_size=0.3)

In [15]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(vocab_size)
    ])
    
    return model

model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)

In [10]:
pred = model(trnx[1])
print(pred.shape)

(64, 100, 86)


In [11]:
model.summary()
optimizer = tf.keras.optimizers.Adam()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           22016     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          6297600   
_________________________________________________________________
dense (Dense)                (64, None, 256)           262400    
_________________________________________________________________
dropout (Dropout)            (64, None, 256)           0         
_________________________________________________________________
dense_1 (Dense)              (64, None, 128)           32896     
_________________________________________________________________
dense_2 (Dense)              (64, None, 86)            1

In [12]:
# Ensure that splitting works fine
sample = tf.random.categorical(pred[0], num_samples=1)
sample = tf.squeeze(sample,axis=-1).numpy()
sample

array([48, 16, 70, 62,  8, 48,  9,  6,  9,  7, 14, 53, 43, 44, 75, 37, 70,
       61, 30, 17, 72, 15, 13, 82, 54, 24, 25, 81,  7, 62, 75, 53, 26, 74,
        7, 71, 31, 24, 28, 57, 50, 75, 54, 17, 62, 27, 22,  8, 34,  9, 35,
       19, 27, 30, 55, 71, 76, 59, 58, 67, 76, 11, 56, 22, 46, 59, 72, 36,
       31, 49,  2, 79, 23, 79,  9, 16,  8, 16, 15, 47, 61, 41, 68,  3, 39,
       25, 47, 75, 80, 56, 25, 70, 33, 10, 41, 70, 85, 80, 24, 44],
      dtype=int64)

In [13]:
print(''.join(id2char[sample]))

W4sk,W-)-+2bRSxLsjE5u31–c=?ö+kxbAw+tF=CfYxc5kB:,I-J7BEdtyhgpy/e:UhuKFX é;é-4,43VjPq!N?Vxëe?sH.Ps−ë=S


In [14]:
loss_fn = lambda labels, logits: tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
ex_loss = loss_fn(trny[1], pred)
print(ex_loss.numpy().mean())

4.4537053


In [16]:
# @tf.function 
def train_step(ip, op):
    with tf.GradientTape() as tape:
        pred = model(ip)
        loss = loss_fn(op, pred)
    grad = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))
    return loss

In [17]:
%%time
min_loss = np.inf
EPOCHS = 2
for epoch in range(EPOCHS):
    for i in range(trnx.shape[0]):
        ip = trnx[i]
        op = trny[i]
        loss = train_step(ip, op)
        if (i%100==0):
            mean_loss = loss.numpy().mean()
            if mean_loss < min_loss:
                min_loss = mean_loss
                model.save_weights("./time/ckpt")
            print(f"At epoch: {epoch+1} => Batch: {i}, Loss: {mean_loss}")

At epoch: 1 => Batch: 0, Loss: 4.453649997711182
At epoch: 1 => Batch: 100, Loss: 2.315669059753418
At epoch: 1 => Batch: 200, Loss: 1.8315255641937256
At epoch: 1 => Batch: 300, Loss: 1.4907742738723755
At epoch: 1 => Batch: 400, Loss: 1.2461707592010498
At epoch: 1 => Batch: 500, Loss: 1.1214592456817627
At epoch: 1 => Batch: 600, Loss: 1.0999459028244019
At epoch: 1 => Batch: 700, Loss: 1.0299654006958008
At epoch: 1 => Batch: 800, Loss: 0.970791757106781
At epoch: 1 => Batch: 900, Loss: 0.91861891746521
At epoch: 1 => Batch: 1000, Loss: 0.903546154499054
At epoch: 1 => Batch: 1100, Loss: 0.8604124188423157
At epoch: 1 => Batch: 1200, Loss: 0.7837527394294739
At epoch: 1 => Batch: 1300, Loss: 0.7501669526100159
At epoch: 1 => Batch: 1400, Loss: 0.7065208554267883
At epoch: 1 => Batch: 1500, Loss: 0.6665033102035522
At epoch: 1 => Batch: 1600, Loss: 0.652634859085083
At epoch: 1 => Batch: 1700, Loss: 0.581480860710144
At epoch: 1 => Batch: 1800, Loss: 0.5469928979873657
At epoch: 1 =

In [18]:
test_loss = []
for i,j in zip(testx, testy):
    test_loss.append(loss_fn(j, model(i)).numpy().mean())

print(f"Test Loss: {np.mean(test_loss)}")

Test Loss: 0.2798250615596771


In [16]:
model_gen = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model_gen.load_weights("./time/ckpt")
model_gen.build(tf.TensorShape([1, None]))
model_gen.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            22016     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
gru_3 (GRU)                  (1, None, 1024)           6297600   
_________________________________________________________________
dense_3 (Dense)              (1, None, 256)            262400    
_________________________________________________________________
dropout_1 (Dropout)          (1, None, 256)            0         
_________________________________________________________________
dense_4 (Dense)              (1, None, 128)            32896     
_________________________________________________________________
dense_5 (Dense)              (1, None, 86)            

In [17]:
def gen_text(mdl, start_str, temp, num_gen):
    num_gen = num_gen
    ip_eval = [char2id[s] for s in start_str]
    ip_eval = tf.expand_dims(ip_eval, 0)
    
    text_gen = []
    temp = temp
    mdl.reset_states()
    for i in range(num_gen):
        pred = mdl(ip_eval)
        pred = tf.squeeze(pred, 0)
        pred = pred/temp
        pred_id = tf.random.categorical(pred,num_samples=1)[-1, 0].numpy()
        ip_eval = tf.expand_dims([pred_id], 0)
        text_gen.append(id2char[pred_id])
    
    return (start_str + "".join(text_gen))

In [20]:
# 'temp' parameter is used to create randomness; higher the temp, greater are the unfamiliar words.
gen_text_time = gen_text(model_gen, start_str="Time", temp=0.5, num_gen=1000)
print(gen_text_time)

Time that happened to pame observe. Whatever the repulsive forces should space expanding at nearly the same rate in all different directions at a time when the density of the universe has its present value. In this case there is no unique stanowever, we still use Newton’s theory for all practical purposes because the difference between its predictions and those of general relativity is very small in the situations that we normally deal with. (Newton’s theory also has the great advantage that it left lots of room on a large scale, the microwave oven awa complete revolution, which had also to the sun, we would not be able to tell whether the leading authority on the structure of stars, predicts the laws that the universe should have begun in just this way, except as the act of a God who intended to create beings like us. In an attempt to find a model of the universe in which the collapsing phase looked like the time reverse of the expanding phase. The contracting phase will be unsuitable