# RNN / GRU Language Model

This notebook implements a **character-level language model** using **RNNs and GRUs** in TensorFlow. 

**Goals:**
- Learn sequence modeling with GRU
- Train a character-level language model
- Generate text using temperature-controlled sampling
- Evaluate log perplexity of the model


Load Dataset and Prepare Vocabulary


In [1]:
import tensorflow as tf
import numpy as np
import os

with open('data/input_text.txt', 'r',encoding="utf-8") as f:
    text = f.read()

vocab = sorted(list(set(text)))
vocab_size = len(vocab)


char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

print(f"Unique characters: {vocab_size}")


Unique characters: 95


Helper Functions


In [2]:
def line_to_tensor(line, vocab):
    chars = list(line)
    char2idx = {ch:i for i,ch in enumerate(vocab)}
    ids = [char2idx[c] for c in chars]
    return tf.convert_to_tensor(ids, dtype=tf.int64)

def split_input_target(sequence):
    return sequence[:-1], sequence[1:]

def create_batch_dataset(lines, vocab, seq_length=100, batch_size=64):
    text = "\n".join(lines)
    all_ids = line_to_tensor(text, vocab)
    dataset = tf.data.Dataset.from_tensor_slices(all_ids)
    sequences = dataset.batch(seq_length+1, drop_remainder=True)
    dataset_xy = sequences.map(lambda seq: split_input_target(seq))
    dataset_xy = dataset_xy.shuffle(10000).batch(batch_size, drop_remainder=True)
    return dataset_xy

def temperature_random_sampling(logits, temperature=1.0):
    logits = logits / temperature
    probs = tf.nn.softmax(logits, axis=-1)
    return tf.random.categorical(tf.math.log(probs), num_samples=1)[-1,0]

def text_from_ids(ids, vocab):
    return ''.join([vocab[i] for i in ids.numpy().flatten()])


GRULM Model

In [3]:
class GRULM(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim=256, rnn_units=128):
        super().__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(rnn_units, return_sequences=True, return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size, activation='log_softmax')

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)
        if return_state:
            return x, states
        else:
            return x

def compile_model(model, learning_rate=0.00125):
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss)
    return model


Prepare Dataset

In [None]:
lines = text.split('\n')
seq_length = 100
batch_size = 64

dataset = create_batch_dataset(lines, vocab, seq_length=seq_length, batch_size=batch_size)


Train the Model

In [5]:
embedding_dim = 256
rnn_units = 128

model = GRULM(vocab_size, embedding_dim, rnn_units)
model = compile_model(model)

history = model.fit(dataset, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Evaluate Log Perplexity

In [6]:
def log_perplexity(preds, target, padding_id=1):
    # Mask padding (if any)
    mask = tf.cast(tf.not_equal(target, padding_id), tf.float32)
    log_probs = tf.reduce_sum(tf.one_hot(target, preds.shape[-1]) * tf.math.log(tf.nn.softmax(preds, axis=-1)), axis=-1)
    log_probs = log_probs * mask
    return -tf.reduce_mean(log_probs)

# Example evaluation
for input_batch, target_batch in dataset.take(1):
    preds, _ = model(input_batch, return_state=True)
    lp = log_perplexity(preds, target_batch)
    print("Log Perplexity:", lp.numpy())


Log Perplexity: 1.4424704


Text Generation

In [10]:
class GenerativeModel:
    def __init__(self, model, vocab, temperature=1.0):
        self.model = model
        self.vocab = vocab
        self.temperature = temperature

    def generate_one_step(self, inputs, states=None):
        input_ids = line_to_tensor(inputs, self.vocab)
        input_ids = tf.expand_dims(input_ids, 0)[:, -1:]  # last token
        logits, states = self.model(input_ids, states=states, return_state=True, training=False)
        logits = logits[:, -1, :]
        next_id = temperature_random_sampling(logits, self.temperature)
        next_char = text_from_ids(tf.expand_dims(next_id,0), self.vocab)
        return next_char, states

    def generate_n_chars(self, num_chars, prefix):
        states = None
        next_char = prefix
        result = [prefix]
        for _ in range(num_chars):
            next_char, states = self.generate_one_step(next_char, states)
            result.append(next_char)
        return "".join(result)

gen_model = GenerativeModel(model, vocab, temperature=0.5)
print(gen_model.generate_n_chars(200, prefix="I have a great"))


I have a great things see him that I should this thou must be love me to me, sir, the world in all the company.

HAMLET.
I’ll see his common, let me so make the money they have all the son age and the shall be more


Save the Model

In [11]:
# Directory to save the model
save_dir = "saved_model/grulm_model"
import os
os.makedirs(save_dir, exist_ok=True)

# Save the model in TensorFlow SavedModel format
model.save(save_dir)

print(f"Model successfully saved at: {save_dir}")




INFO:tensorflow:Assets written to: saved_model/grulm_model\assets


INFO:tensorflow:Assets written to: saved_model/grulm_model\assets


Model successfully saved at: saved_model/grulm_model
