In [2]:
import numpy as np

In [3]:
np.random.seed(42)

In [4]:
w_hh = np.random.standard_normal((3,2))
w_hx = np.random.standard_normal((3,3))
h_t_prev = np.random.standard_normal((2,1))
x_t = np.random.standard_normal((3,1))

In [5]:
stack_1 = np.hstack((w_hh, w_hx))

stack_2 = np.vstack((h_t_prev, x_t))

In [6]:
print(np.matmul(np.hstack((w_hh, w_hx)), np.vstack((h_t_prev, x_t))))
print(np.matmul(stack_1,stack_2))
print(stack_2)
print(np.concatenate([h_t_prev, x_t]))

[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]


In [7]:
import numpy as np
from numpy import random
from time import perf_counter
import tensorflow as tf
from tensorflow import keras 
 

In [8]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [9]:
random.seed(10)                 # Random seed, so your results match ours
emb = 128                       # Embedding size
T = 256                         # Length of sequence
h_dim = 16                      # Hidden state dimension
h_0 = np.zeros((h_dim, 1))     
 
w1 = random.standard_normal((h_dim, emb + h_dim))
w2 = random.standard_normal((h_dim, emb + h_dim))
w3 = random.standard_normal((h_dim, emb + h_dim))

b1 = random.standard_normal((h_dim, 1))
b2 = random.standard_normal((h_dim, 1))
b3 = random.standard_normal((h_dim, 1))

X = random.standard_normal((T, emb, 1))

weights_vanilla = [w1, b1]
weights_GRU = [w1.copy(), w2, w3, b1.copy(), b2, b3]

In [10]:
def forward_RNN(inputs, weights):
    x, ht = inputs
    wh, bh = weights
    
    ht = np.matmul(wh, np.vstack((ht, x)))+bh
    ht = sigmoid(ht)

    y = ht
    #print(ht)
    return y,ht 




In [11]:
def forward_GRU_RNN(inputs, weights):
    x, ht = inputs
    wu,wr,wh, bu,br,bh = weights

    r = sigmoid(np.matmul(wr, np.vstack((ht, x)))+br)
    u = sigmoid(np.matmul(wu, np.vstack((ht, x)))+bu)
    ct = np.tanh(np.matmul(wh, np.concatenate([r * ht, x]))+bh)

    #print(f'{u}\n+\n{r}\n+\n{ct}')
    
    ht = u*ct + (1-u) * ht
    y = ht

    return y,ht 


In [12]:
print(forward_GRU_RNN([X[1], h_0], weights_GRU))

(array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]), array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]))


In [13]:
def scan(function, elems, weights, initializer=h_0):
    cur_value = initializer
    ys = []
    for x in elems:
        y,cur_value = function([x,cur_value],weights)
        ys.append(y)
    return ys,cur_value

In [14]:
print(len(X))

256


In [15]:
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)

print(f"Length of ys: {len(ys)}")
print(f"Shape of each y within ys: {ys[0].shape}")
print(f"Shape of h_T: {h_T.shape}")

Length of ys: 256
Shape of each y within ys: (16, 1)
Shape of h_T: (16, 1)


In [16]:
tic = perf_counter()
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)
toc = perf_counter()
RNN_time=(toc-tic)*1000
print (f"It took {RNN_time:.2f}ms to run the forward method for the vanilla RNN.")

It took 6.16ms to run the forward method for the vanilla RNN.


In [17]:
tic = perf_counter()
ys, h_T = scan(forward_GRU_RNN, X, weights_GRU, h_0)
toc = perf_counter()
GRU_time=(toc-tic)*1000
print (f"It took {GRU_time:.2f}ms to run the forward method for the GRU.")

It took 8.28ms to run the forward method for the GRU.


In [18]:
model_GRU = tf.keras.Sequential([
    tf.keras.layers.GRU(256, return_sequences=True, name='GRU_1_returns_seq'),
    tf.keras.layers.GRU(128, return_sequences=True, name='GRU_2_returns_seq'),
    tf.keras.layers.GRU(64, name='GRU_3_returns_last_only'),
    tf.keras.layers.Dense(10)
])

In [19]:
try:
    model_GRU.summary()
except Exception as e:
    print(e)

In [20]:
# Remember these three numbers and follow them further through the notebook
batch_size = 60
sequence_length = 50
word_vector_length = 40

input_data = tf.random.normal([batch_size, sequence_length, word_vector_length])

prediction = model_GRU(input_data)

model_GRU.summary()

In [21]:
import os


In [22]:
dirname = 'data/'
filename = 'shakespeare_data.txt'
lines = [] # storing all the lines in a variable. 

counter = 0

with open(os.path.join(dirname, filename)) as files:
    for line in files:        
        pure_line = line.strip()
        if pure_line:
            lines.append(pure_line)
            
n_lines = len(lines)
print(f"Number of lines: {n_lines}")
print("\n".join(lines[506:514]))

Number of lines: 125097
BENVOLIO	Here were the servants of your adversary,
And yours, close fighting ere I did approach:
I drew to part them: in the instant came
The fiery Tybalt, with his sword prepared,
Which, as he breathed defiance to my ears,
He swung about his head and cut the winds,
Who nothing hurt withal hiss'd him in scorn:
While we were interchanging thrusts and blows,


In [23]:
def build_vocabulary(lines):
    corpus = ("\n".join(lines))
    vocab = sorted(set(corpus))
    vocab.insert(0,"[UNK]") 
    vocab.insert(1,"") 
    return vocab
vocab = build_vocabulary(lines)
print(len(vocab))
print(" ".join(vocab))


82
[UNK]  	 
   ! $ & ' ( ) , - . 0 1 2 3 4 5 6 7 8 9 : ; ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] a b c d e f g h i j k l m n o p q r s t u v w x y z |


In [24]:
def convert_text_to_tensor(text,vocab):
    chars = tf.strings.unicode_split(text, input_encoding='UTF-8')
    return  tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)(chars)
tmp = convert_text_to_tensor("abc xyz", vocab)
print(tmp)

tf.Tensor([55 56 57  4 78 79 80], shape=(7,), dtype=int64)


In [25]:
def convert_tensor_to_text(tensor, vocab):
    chars_from_ids = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None, invert=True)
    return tf.strings.reduce_join(chars_from_ids(tensor), axis=-1).numpy()
print(convert_tensor_to_text(tmp, vocab))


b'abc xyz'


In [26]:
train_lines = lines[:-1000]
eval_lines = lines[-1000:]

In [27]:
def test_train_split(sequence):
    return sequence[:-1], sequence[1:]
print(test_train_split(list("Tensorflow")))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'], ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])


In [28]:
def generate_dataset(vocab, lines, seq_length=100, batch_size=64, BUFFER_SIZE = 10000):

    line  = "\n".join(lines)
    all_ids = convert_text_to_tensor(line, vocab)
    ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
    data_generator = ids_dataset.batch(seq_length+1, drop_remainder=True)
    dataset_xy = data_generator.map(test_train_split)
    dataset = (                                   
        dataset_xy                                
        .shuffle(BUFFER_SIZE)
        .batch(batch_size, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)  
        )            
    return dataset


In [29]:
BATCH_SIZE = 64
dataset = generate_dataset(vocab, train_lines)

In [30]:
def create_gru_model(vocab_size, embedding_dim, rnn_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, mask_zero=True),
        tf.keras.layers.GRU(rnn_units, return_sequences=True),
        tf.keras.layers.Dense(vocab_size, activation=tf.nn.log_softmax)
    ])
    return model

# Usage:
vocab_size = 82  # Adjust as needed
embedding_dim = 256
rnn_units = 512

model = create_gru_model(vocab_size, embedding_dim, rnn_units)

In [31]:
model.build(input_shape=(None, 100))
model.summary()


In [32]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input: ", input_example_batch[0].numpy()) # Lets use only the first sequence on the batch
    example_batch_predictions = model(tf.constant([input_example_batch[0].numpy()]))
    print("\n",example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Input:  [70 65 63 68 73  4 59 68 69 77  4 55 56 69 75 74  4 79 69 75 25  4 62 59
 72 59  3 79 69 75  8 66 66  4 73 77 59 55 74  4 60 69 72  8 74 13  3 53
 37 68 69 57 65 63 68 61  4 77 63 74 62 63 68 54  3 37 68 69 57 65 11  3
 65 68 69 57 65  5  4 49 62 69  8 73  4 74 62 59 72 59 11  4 63 68  4 74
 62 59  4 69]

 (1, 100, 82) # (batch_size, sequence_length, vocab_size)


In [33]:
example_batch_predictions[0][99].numpy()

array([-4.3906965, -4.391806 , -4.418195 , -4.410832 , -4.3922668,
       -4.396673 , -4.411633 , -4.4147205, -4.416295 , -4.4076157,
       -4.416847 , -4.3992734, -4.3920093, -4.397013 , -4.397345 ,
       -4.4045177, -4.417554 , -4.4087577, -4.4097695, -4.400878 ,
       -4.406688 , -4.41367  , -4.41644  , -4.4084888, -4.4333525,
       -4.384691 , -4.38563  , -4.3952007, -4.4112525, -4.3841023,
       -4.406918 , -4.398651 , -4.4046144, -4.3827014, -4.41077  ,
       -4.4254317, -4.4106174, -4.4145975, -4.4237037, -4.410116 ,
       -4.4033895, -4.438456 , -4.423766 , -4.4096932, -4.4143467,
       -4.405991 , -4.413774 , -4.4011264, -4.4183707, -4.4106135,
       -4.4102964, -4.388678 , -4.432466 , -4.4145074, -4.388995 ,
       -4.3939414, -4.411776 , -4.4152703, -4.39919  , -4.411476 ,
       -4.4035416, -4.40768  , -4.4161153, -4.395751 , -4.381609 ,
       -4.399002 , -4.421347 , -4.417153 , -4.391004 , -4.4012356,
       -4.4205   , -4.3987775, -4.4220033, -4.39702  , -4.4121

In [34]:
sampled_indices = tf.math.argmax(example_batch_predictions[0], axis=1)
print(sampled_indices.numpy())

[13 73 17 12 24 17 25 12 12 18 25 69 21 64 29 58 51 27 54 29 58 25 29 25
 47 25 58  1 54 58 44 17 17 17 17 69 25 69 18 25 49 80 47  5 18 33 80 74
  4 12 26 19 73 42 12 36 62 69 17 17 17 17 66 12 12 79 12 26 19 73 50 32
 73 12 12 19 73 60 70 29 29 29  5 69 17 17 17 58 18 25 50 25 17 12 71 17
 47 33 25 64]


In [35]:
print("Input:\n", convert_tensor_to_text(input_example_batch[0], vocab))
print()
print("Next Char Predictions:\n", convert_tensor_to_text(sampled_indices, vocab))

Input:
 b"pkins enow about you; here\nyou'll sweat for't.\n[Knocking within]\nKnock,\nknock! Who's there, in the o"

Next Char Predictions:
 b'.s3-:3;--4;o7jCdYA]Cd;C;U;d]dR3333o;o4;WzU!4Gzt -?5sP-Jho3333l--y-?5sXFs--5sfpCCC!o333d4;X;3-q3UG;j'


In [36]:
def compile_model(model):
    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=0.00125)
    model.compile(optimizer=opt, loss=loss)
    return model

In [37]:
gpus = tf.config.list_physical_devices('GPU')
print(gpus)


[]


In [38]:
Epochs = 30
model = compile_model(model)
history = model.fit(dataset, epochs = Epochs)

Epoch 1/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 233ms/step - loss: 2.1975
Epoch 2/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 243ms/step - loss: 1.4679
Epoch 3/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 230ms/step - loss: 1.3776
Epoch 4/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 231ms/step - loss: 1.3365
Epoch 5/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 231ms/step - loss: 1.3115
Epoch 6/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 229ms/step - loss: 1.2928
Epoch 7/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 231ms/step - loss: 1.2807
Epoch 8/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 230ms/step - loss: 1.2691
Epoch 9/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 236ms/step - loss: 1.2590
Epoch 10/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [39]:
model.save_weights("saved.weights.h5")

In [41]:
model.load_weights("saved.weights.h5")

In [42]:
model.summary()

In [43]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input: ", input_example_batch[0].numpy()) # Lets use only the first sequence on the batch
    example_batch_predictions = model(tf.constant([input_example_batch[0].numpy()]))
    print("\n",example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Input:  [75 72  4 70 75 72 73 59  4 74 69  4 74 62 59  4 69 75 74 73 63 58 59  4
 69 60  4 62 63 73  4 62 55 68 58 11  3 55 68 58  4 68 69  4 67 69 72 59
  4 55 58 69 13  4 44 59 67 59 67 56 59 72  4  8 73 74 69 68 59 58 11  8
  4 55 68 58  4  8 60 66 55 79 59 58  4 55 66 63 76 59 13  8  3 45 62 59
 70 62 59 72]

 (1, 100, 82) # (batch_size, sequence_length, vocab_size)


In [44]:
sampled_indices = tf.math.argmax(example_batch_predictions[0], axis=1)
print(sampled_indices.numpy())

[72  4 66 66 72 70 59 11 74 62  4 74 62 59 59 57 74 74 77 63 58 59  4 69
 60  4 74 59 73  4 70 59 68 58 11  3 27 68 58  4 74 69 74 67 55 72 59  4
 74 58 69 72  3 46 59 74 59 67 56 59 72  4 74 74 57 69 68 59 58  4  4  4
 55 68 58  4 74 74 69 55 77 59 58  4 55 68 72 76 59 11  3  3 45 35 59  4
 62 59 72 58]


In [45]:
print("Input:\n", convert_tensor_to_text(input_example_batch[0], vocab))
print()
print("Next Char Predictions:\n", convert_tensor_to_text(sampled_indices, vocab))

Input:
 b"ur purse to the outside of his hand,\nand no more ado. Remember 'stoned,' and 'flayed alive.'\nShepher"

Next Char Predictions:
 b'r llrpe,th theecttwide of tes pend,\nAnd totmare tdor\nTetember ttconed   and ttoawed anrve,\n\nSIe herd'
