In [1]:
import numpy as np

In [2]:
np.random.seed(42)

In [3]:
w_hh = np.random.standard_normal((3,2))
w_hx = np.random.standard_normal((3,3))
h_t_prev = np.random.standard_normal((2,1))
x_t = np.random.standard_normal((3,1))

In [4]:
stack_1 = np.hstack((w_hh, w_hx))

stack_2 = np.vstack((h_t_prev, x_t))

In [5]:
print(np.matmul(np.hstack((w_hh, w_hx)), np.vstack((h_t_prev, x_t))))
print(np.matmul(stack_1,stack_2))
print(stack_2)
print(np.concatenate([h_t_prev, x_t]))

[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[ 0.32319683]
 [-0.6577149 ]
 [ 4.61825108]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]
[[-0.56228753]
 [-1.01283112]
 [ 0.31424733]
 [-0.90802408]
 [-1.4123037 ]]


In [6]:
import numpy as np
from numpy import random
from time import perf_counter
import tensorflow as tf
from tensorflow import keras 
 

2024-07-16 18:57:07.102889: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-16 18:57:07.126617: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-16 18:57:07.133171: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-16 18:57:07.149998: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [8]:
random.seed(10)                 # Random seed, so your results match ours
emb = 128                       # Embedding size
T = 256                         # Length of sequence
h_dim = 16                      # Hidden state dimension
h_0 = np.zeros((h_dim, 1))     
 
w1 = random.standard_normal((h_dim, emb + h_dim))
w2 = random.standard_normal((h_dim, emb + h_dim))
w3 = random.standard_normal((h_dim, emb + h_dim))

b1 = random.standard_normal((h_dim, 1))
b2 = random.standard_normal((h_dim, 1))
b3 = random.standard_normal((h_dim, 1))

X = random.standard_normal((T, emb, 1))

weights_vanilla = [w1, b1]
weights_GRU = [w1.copy(), w2, w3, b1.copy(), b2, b3]

In [9]:
def forward_RNN(inputs, weights):
    x, ht = inputs
    wh, bh = weights
    
    ht = np.matmul(wh, np.vstack((ht, x)))+bh
    ht = sigmoid(ht)

    y = ht
    #print(ht)
    return y,ht 




In [10]:
def forward_GRU_RNN(inputs, weights):
    x, ht = inputs
    wu,wr,wh, bu,br,bh = weights

    r = sigmoid(np.matmul(wr, np.vstack((ht, x)))+br)
    u = sigmoid(np.matmul(wu, np.vstack((ht, x)))+bu)
    ct = np.tanh(np.matmul(wh, np.concatenate([r * ht, x]))+bh)

    #print(f'{u}\n+\n{r}\n+\n{ct}')
    
    ht = u*ct + (1-u) * ht
    y = ht

    return y,ht 


In [11]:
print(forward_GRU_RNN([X[1], h_0], weights_GRU))

(array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]), array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]]))


In [12]:
def scan(function, elems, weights, initializer=h_0):
    cur_value = initializer
    ys = []
    for x in elems:
        y,cur_value = function([x,cur_value],weights)
        ys.append(y)
    return ys,cur_value

In [13]:
print(len(X))

256


In [14]:
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)

print(f"Length of ys: {len(ys)}")
print(f"Shape of each y within ys: {ys[0].shape}")
print(f"Shape of h_T: {h_T.shape}")

Length of ys: 256
Shape of each y within ys: (16, 1)
Shape of h_T: (16, 1)


In [15]:
tic = perf_counter()
ys, h_T = scan(forward_RNN, X, weights_vanilla, h_0)
toc = perf_counter()
RNN_time=(toc-tic)*1000
print (f"It took {RNN_time:.2f}ms to run the forward method for the vanilla RNN.")

It took 9.86ms to run the forward method for the vanilla RNN.


In [16]:
tic = perf_counter()
ys, h_T = scan(forward_GRU_RNN, X, weights_GRU, h_0)
toc = perf_counter()
GRU_time=(toc-tic)*1000
print (f"It took {GRU_time:.2f}ms to run the forward method for the GRU.")

It took 15.43ms to run the forward method for the GRU.


In [17]:
model_GRU = tf.keras.Sequential([
    tf.keras.layers.GRU(256, return_sequences=True, name='GRU_1_returns_seq'),
    tf.keras.layers.GRU(128, return_sequences=True, name='GRU_2_returns_seq'),
    tf.keras.layers.GRU(64, name='GRU_3_returns_last_only'),
    tf.keras.layers.Dense(10)
])

I0000 00:00:1721181429.023349  251028 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1721181429.068600  251028 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1721181429.068810  251028 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1721181429.070333  251028 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [18]:
try:
    model_GRU.summary()
except Exception as e:
    print(e)

In [19]:
# Remember these three numbers and follow them further through the notebook
batch_size = 60
sequence_length = 50
word_vector_length = 40

input_data = tf.random.normal([batch_size, sequence_length, word_vector_length])

prediction = model_GRU(input_data)

model_GRU.summary()

2024-07-16 18:57:09.551793: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


In [20]:
import os


In [21]:
dirname = 'data/'
filename = 'shakespeare_data.txt'
lines = [] # storing all the lines in a variable. 

counter = 0

with open(os.path.join(dirname, filename)) as files:
    for line in files:        
        pure_line = line.strip()
        if pure_line:
            lines.append(pure_line)
            
n_lines = len(lines)
print(f"Number of lines: {n_lines}")
print("\n".join(lines[506:514]))

Number of lines: 125097
BENVOLIO	Here were the servants of your adversary,
And yours, close fighting ere I did approach:
I drew to part them: in the instant came
The fiery Tybalt, with his sword prepared,
Which, as he breathed defiance to my ears,
He swung about his head and cut the winds,
Who nothing hurt withal hiss'd him in scorn:
While we were interchanging thrusts and blows,


In [22]:
def build_vocabulary(lines):
    corpus = ("\n".join(lines))
    vocab = sorted(set(corpus))
    vocab.insert(0,"[UNK]") 
    vocab.insert(1,"") 
    return vocab
vocab = build_vocabulary(lines)
print(len(vocab))
print(" ".join(vocab))


82
[UNK]  	 
   ! $ & ' ( ) , - . 0 1 2 3 4 5 6 7 8 9 : ; ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ ] a b c d e f g h i j k l m n o p q r s t u v w x y z |


In [23]:
def convert_text_to_tensor(text,vocab):
    chars = tf.strings.unicode_split(text, input_encoding='UTF-8')
    return  tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None)(chars)
tmp = convert_text_to_tensor("abc xyz", vocab)
print(tmp)

tf.Tensor([55 56 57  4 78 79 80], shape=(7,), dtype=int64)


In [24]:
def convert_tensor_to_text(tensor, vocab):
    chars_from_ids = tf.keras.layers.StringLookup(vocabulary=list(vocab), mask_token=None, invert=True)
    return tf.strings.reduce_join(chars_from_ids(tensor), axis=-1).numpy()
print(convert_tensor_to_text(tmp, vocab))


b'abc xyz'


In [25]:
train_lines = lines[:-1000]
eval_lines = lines[-1000:]

In [26]:
def test_train_split(sequence):
    return sequence[:-1], sequence[1:]
print(test_train_split(list("Tensorflow")))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'], ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])


In [27]:
def generate_dataset(vocab, lines, seq_length=100, batch_size=64, BUFFER_SIZE = 10000):

    line  = "\n".join(lines)
    all_ids = convert_text_to_tensor(line, vocab)
    ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
    data_generator = ids_dataset.batch(seq_length+1, drop_remainder=True)
    dataset_xy = data_generator.map(test_train_split)
    dataset = (                                   
        dataset_xy                                
        .shuffle(BUFFER_SIZE)
        .batch(batch_size, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)  
        )            
    return dataset


In [28]:
BATCH_SIZE = 64
dataset = generate_dataset(vocab, train_lines)

In [29]:
def create_gru_model(vocab_size, embedding_dim, rnn_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, mask_zero=True),
        tf.keras.layers.GRU(rnn_units, return_sequences=True),
        tf.keras.layers.Dense(vocab_size, activation=tf.nn.log_softmax)
    ])
    return model

# Usage:
vocab_size = 82  # Adjust as needed
embedding_dim = 256
rnn_units = 1024

model = create_gru_model(vocab_size, embedding_dim, rnn_units)

In [30]:
model.build(input_shape=(None, 100))
model.summary()


In [31]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input: ", input_example_batch[0].numpy()) # Lets use only the first sequence on the batch
    example_batch_predictions = model(tf.constant([input_example_batch[0].numpy()]))
    print("\n",example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Input:  [ 3 39 55 73 65 73  4 60 69 72  4 60 55 57 59 73  4 55 68 58  4 60 69 72
  4 68 69 73 59 73 25  3 28 75 61 66 59  4 56 72 55 57 59 66 59 74 11  4
 68 59 57 65 66 55 57 59  4 55 67 56 59 72 11  3 42 59 72 60 75 67 59  4
 60 69 72  4 55  4 66 55 58 79  8 73  4 57 62 55 67 56 59 72 25  3 33 69
 66 58 59 68]

 (1, 100, 82) # (batch_size, sequence_length, vocab_size)


2024-07-16 18:57:15.190813: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [32]:
example_batch_predictions[0][99].numpy()

array([-4.4130692, -4.416311 , -4.4016833, -4.410658 , -4.4067287,
       -4.3992157, -4.4210277, -4.407576 , -4.4139214, -4.396899 ,
       -4.396371 , -4.4084687, -4.407805 , -4.423681 , -4.416815 ,
       -4.419162 , -4.4033084, -4.405543 , -4.393971 , -4.4030976,
       -4.4075236, -4.4005265, -4.412947 , -4.4052925, -4.406697 ,
       -4.390149 , -4.4055448, -4.4109864, -4.397797 , -4.4068136,
       -4.384658 , -4.420477 , -4.418874 , -4.414543 , -4.419545 ,
       -4.4037094, -4.4163322, -4.4001765, -4.410797 , -4.411979 ,
       -4.4173183, -4.4164767, -4.3829775, -4.399707 , -4.4190474,
       -4.4072313, -4.413683 , -4.392843 , -4.414936 , -4.405369 ,
       -4.3996434, -4.3936777, -4.404102 , -4.3984213, -4.3938465,
       -4.406202 , -4.4059267, -4.4067974, -4.3892555, -4.393785 ,
       -4.396074 , -4.4068847, -4.4093714, -4.4166293, -4.4203515,
       -4.402825 , -4.4240513, -4.406167 , -4.3970575, -4.415121 ,
       -4.400227 , -4.4143405, -4.4084506, -4.384723 , -4.4136

In [33]:
sampled_indices = tf.math.argmax(example_batch_predictions[0], axis=1)
print(sampled_indices.numpy())

[71 75 12 59 57 57  3  3 80 80 80 59 12 72 58 59  3 64 30 73 80 59 80 80
 80 51 80 80 57 57 64 71 42 59  7 79 26 53 53 51 64 72 58 79 26  5 46 80
 30 42 69 58  9 58 72 58 10 58 69  2 26 50 46 20 66 42 50 59 59 69 26  3
 34 80 80 80  3  3 53 14 59 71 59 59  3 22 37 14 12  2 26 50 35 20 51 80
 80 80 26 42]


In [34]:
print("Input:\n", convert_tensor_to_text(input_example_batch[0], vocab))
print()
print("Next Char Predictions:\n", convert_tensor_to_text(sampled_indices, vocab))

Input:
 b"\nMasks for faces and for noses;\nBugle bracelet, necklace amber,\nPerfume for a lady's chamber;\nGolden"

Next Char Predictions:
 b'qu-ecc\n\nzzze-rde\njDszezzzYzzccjqPe&y?[[Yjrdy?!TzDPod(drd)do\t?XT6lPXeeo?\nHzzz\n\n[0eqee\n8K0-\t?XI6Yzzz?P'


In [35]:
def compile_model(model):
    loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
    opt = tf.keras.optimizers.Adam(learning_rate=0.00125)
    model.compile(optimizer=opt, loss=loss)
    return model

In [36]:
gpus = tf.config.list_physical_devices('GPU')
print(gpus)


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [37]:
Epochs = 30
model = compile_model(model)
history = model.fit(dataset, epochs = Epochs)

Epoch 1/30


[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 39ms/step - loss: 2.2383
Epoch 2/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 38ms/step - loss: 1.4186
Epoch 3/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 37ms/step - loss: 1.3195
Epoch 4/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.2717
Epoch 5/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.2393
Epoch 6/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.2142
Epoch 7/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.1942
Epoch 8/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.1775
Epoch 9/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 35ms/step - loss: 1.1641
Epoch 10/30
[1m790/790[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 36ms/

In [38]:
model.save_weights("saved.weights.h5")

In [39]:
model.load_weights("saved.weights.h5")

In [40]:
model.summary()

In [41]:
for input_example_batch, target_example_batch in dataset.take(1):
    print("Input: ", input_example_batch[0].numpy()) # Lets use only the first sequence on the batch
    example_batch_predictions = model(tf.constant([input_example_batch[0].numpy()]))
    print("\n",example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Input:  [75 72  4 77 63 66 66  4 56 59  4 58 69 68 59 24  4 74 62 63 73  4 67 75
 73 74  4 67 79  4 57 69 67 60 69 72 74  4 56 59 11  3 45 75 68  4 74 62
 55 74  4 77 55 72 67 73  4 79 69 75  4 62 59 72 59  4 73 62 55 66 66  4
 73 62 63 68 59  4 69 68  4 67 59 25  3 27 68 58  4 74 62 69 73 59  4 62
 63 73  4 61]

 (1, 100, 82) # (batch_size, sequence_length, vocab_size)


2024-07-16 19:10:30.299690: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [42]:
sampled_indices = tf.math.argmax(example_batch_predictions[0], axis=1)
print(sampled_indices.numpy())

[72  4 60 63 74 66  4 56 59  4 73 59 68 59  4  3 35 62 59 73  4 63 69 73
 63  4 35 55  4 62 69 75 70 69 72 74  4 63 59  4  3 46 69 57  4 74 62 59
 74  4 35 59 73 73  4  4 62 69 75 72 55 55 72 59  4 55 69 59 66 66  4 73
 59 55 68 59  4 69 60  4 74 59  4  3 27 68 58  4 74 62 59 75 59  4 74 59
 73  4 70 72]


In [43]:
print("Input:\n", convert_tensor_to_text(input_example_batch[0], vocab))
print()
print("Next Char Predictions:\n", convert_tensor_to_text(sampled_indices, vocab))

Input:
 b'ur will be done: this must my comfort be,\nSun that warms you here shall shine on me;\nAnd those his g'

Next Char Predictions:
 b'r fitl be sene \nIhes iosi Ia houport ie \nToc thet Iess  houraare aoell seane of te \nAnd theue tes pr'


In [44]:

def generate_text(model, vocab, starter_word, num_chars_to_generate=100, temperature=0.001):
    # Initialize the text with the starter word
    generated_text = starter_word
    input_eval = convert_text_to_tensor(starter_word, vocab)
    input_eval = tf.expand_dims(input_eval, 0)

    # Generate characters
    for _ in range(num_chars_to_generate):
        predictions = model(input_eval)
        predictions = predictions[:, -1, :] / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
        
        # Convert the predicted ID to a character and add to the generated text
        predicted_char = vocab[predicted_id]
        generated_text += predicted_char

        # Update the input for the next prediction
        input_eval = tf.expand_dims([predicted_id], 0)

    return generated_text






In [45]:
starter_word = "to be"
generated_text = generate_text(model, vocab, starter_word, num_chars_to_generate=100, temperature=0.4)
print(generated_text)

to be s t and are t ar the and wo wid and s s in hour ange an anghe s an s the histhe the g me ind the wi
