In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import numpy as np
import secrets
import time

In [7]:
from google.colab import drive
drive.mount("/content/gdrive")
drive_path = "gdrive/MyDrive/MachineLearning/HandsOnMachineLearning/chapter16"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# 8.

In [3]:
class Node():
    def __init__(self, letter, next_nodes=None):
        self.letter = letter
        self.next_nodes = next_nodes

    def pick_next_node(self):
        return secrets.choice(self.next_nodes)

    def set_next_nodes(self, next_nodes):
        self.next_nodes = next_nodes


class ReberString():
    def __init__(self, postlayers):
        self.end = Node("E")
        layer3_V = Node("V")
        layer3_S = Node("S")
        layer3_X = Node("X")
        layer3_P = Node("P")
        layer2_V = Node("V")
        layer2_T = Node("T")
        layer2_X = Node("X")
        layer2_S = Node("S")
        layer1_P = Node("P")
        layer1_T = Node("T")
        self.start = Node("B")

        self.end.set_next_nodes(postlayers)
        layer3_V.set_next_nodes([end])
        layer3_S.set_next_nodes([end])
        layer3_X.set_next_nodes([layer2_T, layer2_V])
        layer3_P.set_next_nodes([layer3_X, layer3_S])

        layer2_V.set_next_nodes([layer3_P, layer3_V])
        layer2_T.set_next_nodes([layer2_T, layer2_V])
        layer2_X.set_next_nodes([layer3_X, layer3_S])
        layer2_S.set_next_nodes([layer2_S, layer2_X])

        layer1_P.set_next_nodes([layer2_T, layer2_V])
        layer1_T.set_next_nodes([layer2_S, layer2_X])
        self.start.set_next_nodes([layer1_P, layer1_T])


end = Node("E")
postlayer_P = Node("P", [end])
postlayer_T = Node("T", [end])
string_P = ReberString(postlayer_P)
string_T = ReberString(postlayer_T)
prelayer_P = Node("P", [string_P.start])
prelayer_T = Node("T", [string_T.start])
start = Node("B", [prelayer_T, prelayer_P])

unique_letters = ["B", "E", "P", "S", "T", "V", "X"]

In [4]:
def generate_valid_sequence(start, join=True):
    letters = []
    cur_node = start

    while cur_node.next_nodes is not None:
        letters.append(cur_node.letter)
        cur_node = cur_node.pick_next_node()

    if join:
        valid_string = "".join(letters)
        return valid_string
    return letters


def one_hot_encoding(string, unique_letters):
    return [unique_letters.index(c) for c in string]


def create_valid_data(length, start, unique_letters):
    valid_strings = []
    for _ in range(length//2):
        valid_string = generate_valid_sequence(start)
        valid_strings.append(one_hot_encoding(valid_string, unique_letters))
    return valid_strings


def create_invalid_data(length, start, unique_letters, n_invalid_letters=1):
    invalid_strings = []
    for _ in range(length//2):
        letters = generate_valid_sequence(start, False)
        indices = secrets.SystemRandom().sample(range(len(letters)), n_invalid_letters)
        for i in indices:
            cur_letter = letters[i]
            new_letter = secrets.choice([letter for letter in unique_letters
                                        if letter != cur_letter])
            letters[i] = new_letter
        new_string = "".join(letters)
        invalid_strings.append(one_hot_encoding(new_string, unique_letters))
    return invalid_strings



def create_dataset(length, start, unique_letters, training=False):
    valid_data = create_valid_data(length, start, unique_letters)
    invalid_data = create_invalid_data(length, start, unique_letters, 1)
    data = [*valid_data, *invalid_data]
    X = tf.ragged.constant(data, ragged_rank=1)
    y = np.array([[1.] for _ in range(len(valid_data))] +
                 [[0.] for _ in range(len(invalid_data))])
    return X, y


X_train, y_train = create_dataset(7500, start, unique_letters, True)
X_valid, y_valid = create_dataset(1500, start, unique_letters)
X_test, y_test = create_dataset(1000, start, unique_letters)

In [6]:
filepath = time.strftime(f"{drive_path}/models/reberstring_%Y_%m_%d-%H_%M_%S")
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    filepath,
    monitor='val_loss',
    verbose=0,
    save_best_only=True,
    save_weights_only=False,
    mode='auto',
    save_freq='epoch',
    initial_value_threshold=None
)
embedding_size = 5

model = keras.models.Sequential([
    layers.InputLayer(input_shape=[None], dtype=tf.int32, ragged=True),
    layers.Embedding(input_dim=len(unique_letters), output_dim=embedding_size),
    layers.GRU(30),
    layers.Dense(1, activation="sigmoid")
])
optimizer = keras.optimizers.SGD(learning_rate=0.02, momentum=0.95, nesterov=True)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid),
          callbacks=[checkpoint_cb])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x78700f0a76a0>

# 9.