<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_TensorFlow/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import tensorflow as tf
import numpy as np

class MyRNN(tf.keras.Model):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size

        # Parameters
        self.Wxh = self.add_weight(
            shape=(input_size, hidden_size),
            initializer=tf.random_normal_initializer(stddev=0.1),
            trainable=True
        )
        self.Whh = self.add_weight(
            shape=(hidden_size, hidden_size),
            initializer=tf.random_normal_initializer(stddev=0.1),
            trainable=True
        )
        self.bh = self.add_weight(
            shape=(hidden_size,),
            initializer=tf.zeros_initializer(),
            trainable=True
        )

        self.Why = self.add_weight(
            shape=(hidden_size, output_size),
            initializer=tf.random_normal_initializer(stddev=0.1),
            trainable=True
        )
        self.by = self.add_weight(
            shape=(output_size,),
            initializer=tf.zeros_initializer(),
            trainable=True
        )

    def call(self, x, h0=None):
        """
        x: (seq_len, batch, input_size)
        """
        seq_len = tf.shape(x)[0]
        batch = tf.shape(x)[1]

        if h0 is None:
            h = tf.zeros((batch, self.hidden_size))
        else:
            h = h0

        outputs = []

        for t in range(seq_len):
            xt = x[t]  # (batch, input_size)

            h = tf.tanh(
                tf.matmul(xt, self.Wxh) +
                tf.matmul(h, self.Whh) +
                self.bh
            )

            y = tf.matmul(h, self.Why) + self.by
            outputs.append(y)

        return tf.stack(outputs), h


# -----------------------------
# Data prep
# -----------------------------
text = "hello world"
chars = sorted(list(set(text)))
stoi = {c:i for i,c in enumerate(chars)}
itos = {i:c for c,i in stoi.items()}

def encode(s):
    return np.array([stoi[c] for c in s], dtype=np.int32)

def one_hot(idx, vocab_size):
    return tf.one_hot(idx, vocab_size)

vocab_size = len(chars)
hidden_size = 32

model = MyRNN(vocab_size, hidden_size, vocab_size)
optimizer = tf.keras.optimizers.Adam(0.01)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

data = encode(text)

# -----------------------------
# Training loop
# -----------------------------
for epoch in range(500):
    with tf.GradientTape() as tape:
        x = one_hot(data[:-1], vocab_size)  # (seq, vocab)
        x = tf.expand_dims(x, 1)            # (seq, batch=1, vocab)
        y_true = data[1:]                   # next char

        y_pred, _ = model(x)
        y_pred = tf.squeeze(y_pred, axis=1)  # (seq, vocab)

        loss = loss_fn(y_true, y_pred)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    if epoch % 50 == 0:
        print(f"epoch {epoch}, loss={loss.numpy():.4f}")


# -----------------------------
# Generation
# -----------------------------
def generate(model, start="h", length=20):
    idx = stoi[start]
    h = None
    out = start

    for _ in range(length):
        x = one_hot([idx], vocab_size)
        x = tf.expand_dims(x, 1)  # (seq=1, batch=1, vocab)

        y, h = model(x, h)
        logits = y[-1]  # (1, vocab)

        probs = tf.nn.softmax(logits)
        idx = tf.random.categorical(tf.math.log(probs), 1).numpy()[0,0]

        out += itos[idx]

    return out

print(generate(model, "h"))

epoch 0, loss=2.0967
epoch 50, loss=0.0067
epoch 100, loss=0.0025
epoch 150, loss=0.0016
epoch 200, loss=0.0011
epoch 250, loss=0.0008
epoch 300, loss=0.0006
epoch 350, loss=0.0005
epoch 400, loss=0.0004
epoch 450, loss=0.0003
hello worldrlworlorld
