<a href="https://colab.research.google.com/github/Atharv-16/char-level-text-generation-using-LSTM/blob/main/char_level_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing Libraries

In [4]:
import keras
from keras import layers

import numpy as np
import random
import io

### Load and Pre-process text data

In [11]:
path = keras.utils.get_file(
    "nietzsche.txt",
    origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt",
)
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
y = np.zeros((len(sentences), len(chars)), dtype="bool")
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Corpus length: 600893
Total chars: 56
Number of sequences: 200285


### Build the Model

In [18]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128,return_sequences=True),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

### Sample and generate text

In [19]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

### Train and test the model

In [23]:
epochs = 30
batch_size = 128

for epoch in range(epochs):
    # print(epoch)
    model.fit(x, y, batch_size=batch_size, epochs=1,verbose=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print("...Temperature:", temperature)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        sentence = text[0 :  maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print("-")


Generating text after epoch: 0
...Temperature: 0.2
...Generating with seed: "preface   supposing that truth is a woma"
...Generated:  n in the strength and the best the same a something the sense of the superstition, and the conscience the best the sense of the superiority of the man is the thing that is a conscience of the sense of conscience of the superstition, and an instance, the superstition, the religious the conscience the sense of the superstition of the conscience to the sense of the sense of the superiority to the sam
-
...Temperature: 0.5
...Generating with seed: "preface   supposing that truth is a woma"
...Generated:  n and manifest such as a christian little, so thought to the really who have been to the errorated these with a manifest and thereby, the religious more that is all any they indeed to the sense and a stare of the probable the subtless: and men is any the same toven to be churct consciented and the morality is the missonces itself belief to the thing of the 