In [1]:
import numpy as np
import keras
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical

import joblib


In [2]:
filename = "Datasets_for_labs/wonderland.txt"
raw_text = open(filename, encoding='utf-8').read()
raw_text = raw_text.lower()

In [3]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  144678
Total Vocab:  51


In [5]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  144578


In [6]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = to_categorical(dataY)

# !!! НЕ ТРОГАТЬ

In [14]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

  super().__init__(**kwargs)


In [20]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5.keras"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [17]:
model.summary()

In [24]:
tensorboard_callback = keras.callbacks.TensorBoard(log_dir="./logs")

In [25]:
model.fit(X, y, epochs=20, batch_size=128, callbacks=[tensorboard_callback])

Epoch 1/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 119ms/step - loss: 2.7475
Epoch 2/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 117ms/step - loss: 2.6630
Epoch 3/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 120ms/step - loss: 2.5963
Epoch 4/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 117ms/step - loss: 2.5349
Epoch 5/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 117ms/step - loss: 2.4782
Epoch 6/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 118ms/step - loss: 2.4334
Epoch 7/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 118ms/step - loss: 2.3759
Epoch 8/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 119ms/step - loss: 2.3256
Epoch 9/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 119ms/step - loss: 2.2872
Epoch 10/20
[1m1130/1130[0m [32m━━

<keras.src.callbacks.history.History at 0x1f89246ea10>

In [29]:
import joblib

joblib.dump(model, 'wonderland_NN.joblib', compress=3)

AttributeError: 'NoneType' object has no attribute 'File'

# !!! ТРОГАТЬ



In [10]:
model_test = Sequential()
model_test.add(LSTM(10, input_shape=(X.shape[1], X.shape[2])))
model_test.add(Dropout(0.2))
model_test.add(Dense(y.shape[1], activation='softmax'))
model_test.compile(loss='categorical_crossentropy', optimizer='adam')

  super().__init__(**kwargs)


In [11]:
model_test.summary()

In [12]:
model_test.fit(X, y, epochs=20, batch_size=128)

Epoch 1/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step - loss: 3.3079
Epoch 2/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0824
Epoch 3/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 3.0679
Epoch 4/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 3.0493
Epoch 5/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 3.0332
Epoch 6/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 3.0174
Epoch 7/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 2.9852
Epoch 8/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - loss: 2.9503
Epoch 9/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - loss: 2.9288
Epoch 10/20
[1m1130/1130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x13a51cd1d90>

In [31]:
int_to_char = dict((i, c) for i, c in enumerate(chars))
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
    
print("\nDone")

Seed:
" s or not.”

“i’m a poor man, your majesty,” the hatter began, in a trembling voice,
“—and i hadn’t b "
ege tiane io the say.”

“i wash the corstuse of the canee?” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit tile,” said alice, “ho wou don’t keke the toedl of the say.”

“h wash the corstuse ”ou aalit

In [14]:
import keras
from keras import layers

import numpy as np
import random
import io

In [15]:
path = keras.utils.get_file(
    "nietzsche.txt",
    origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt",
)
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
y = np.zeros((len(sentences), len(chars)), dtype="bool")
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
[1m600901/600901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Corpus length: 600893
Total chars: 56
Number of sequences: 200285


In [16]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [17]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [21]:
epochs = 10
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print("-")

[1m1565/1565[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 20ms/step - loss: 1.8086

Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: " in respect to the exoteric class, stand"
...Generated:  s and superion to a superion of the superion of the superion to the stands the contrance of the superion of the superion of the selfous and superion to the despectation of the superion of the superion of the self to the superion of the superion of the self a strong the self-contration of the superion of the superion of the superion of the contrance of the as a superion of the superion of the self-
-
...Diversity: 0.5
...Generating with seed: " in respect to the exoteric class, stand"
...Generated:  s of the well to a sater and to the taste of the preserfent to superiled of the desprive one to see in the world to the man in the will in the sudenced, was the stands and the custen with he would to all to the one such a father man for the action of the string belong 