In [None]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import random
import io

In [None]:
# importing data
with io.open('cwe-train.txt', encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

Corpus length: 603432


In [None]:
chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool_)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool_)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Total chars: 31
Number of sequences: 201131


In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
epochs = 50
batch_size = 256

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()


Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: "dahe kuvudaza mbuli za chibozi hawamanyi"
...Generated:  le mulungu kawalongela munhu yelile kulonga kulawa kulonga kulonga kulonga kulonga kulonga kulonga kulonga kumwenu kwa ugungu na mulungu kawalongela mulungu kamulongela munhu yelile kulawa kulonga kulonga kulonga kwa mulungu kamulungulila muna dibululu da ichimu cha chilisito kwa ugungu wa mulungu kwa ugungu na wanhu wa mulungu kezawa wanhu wa mulungu keza mulungu kawalongela munhu ya mulungu kamu

...Diversity: 0.5
...Generating with seed: "dahe kuvudaza mbuli za chibozi hawamanyi"
...Generated:   wa mulungu na mulungu kamuwong'ha uganga wake mulungu, niye niye. mbali nolonga na mulungu kezakuwa ng'hulu inogile ya mulungu kawakuwa mulungu kulawa kulonga muna iyoya kulungulizi wa mulungu na kulonga kulonda kulawilila ng'hulu iyo, kulawa kuwa mulungu na mulungu na waja wahitwa yesu kulonga kutungula kulongela uko muna imanyali ya mulungu kwa udaho zelile m

  after removing the cwd from sys.path.


...Generated:  anhu wa mulungu, kawalongela, "niye niye na kuwalagusa wanhu wa mulungu wa mulungu wa mulungu, kwaviya mulungu kamulongela malagilizo ya mulungu yoyamwing'hile musa ya mulungu. maabaho maabaho wanhu wa mulungu yoyamwing'hile musa ya mulungu, na kuwalawilila wanhu wa kuulanga kakala yofundiza kulawa kuulanga. maabaho yesu kawalongela wanhu wa mulungu na kuwalagusa wanhu wa mulungu wa chilisito kwa 

...Diversity: 0.5
...Generating with seed: "ma usenga uko efeso wakawatange walala w"
...Generated:  anhu wengi wengiligwa na walondwa, muhala hachimanyile vinogile kuwa maliya kamulamba na kumulongela yesu, "mwizagalila malagilizo yoyodaha kumulamba na kumwing'ha hewela kuwa na wanhu wa mulungu weli siyo wayahudi wawo wapate vili muna yamalama ya mulungu. wanhu wano wowoniwagusa waja wohitalile kuwa usenga wa mulungu, walondwa. yondayamwing'hile musa yoyodahile kulawa kuulanga wa chilisito yesu,

...Diversity: 1.0
...Generating with seed: "ma usenga uko efeso wakawatange wala