**Цель:**

Рекуррентные нейронные сети также могут быть использованы в качестве генеративных 
моделей.
Это означает, что в дополнение к тому, что они используются для прогнозных моделей 
(создания прогнозов), они могут изучать последовательности проблемы, а затем 
генерировать совершенно новые вероятные последовательности для проблемной 
области.
Подобные генеративные модели полезны не только для изучения того, насколько хорошо 
модель выявила проблему, но и для того, чтобы узнать больше о самой проблемной 
области.

**Задачи:**

Ознакомиться с генерацией текста
Ознакомиться с системой Callback в Keras

In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

Total Characters:  163783
Total Vocab:  61
Total Patterns:  163683


In [3]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', 
verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model.fit(X, y, epochs=3, batch_size=128, callbacks=callbacks_list)

Epoch 1/3
Epoch 1: loss improved from inf to 2.97969, saving model to weights-improvement-01-2.9797.hdf5
Epoch 2/3
Epoch 2: loss improved from 2.97969 to 2.80788, saving model to weights-improvement-02-2.8079.hdf5
Epoch 3/3
Epoch 3: loss improved from 2.80788 to 2.72892, saving model to weights-improvement-03-2.7289.hdf5


<keras.callbacks.History at 0x1fe7b9a3f40>

In [None]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
filename = "weights8.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")

Total Characters:  163783
Total Vocab:  61
Total Patterns:  163683
Seed:
" e.

'she can't explain it,' said the gryphon hastily. 'go on with the next
verse.'

'but about his t "
oen ' said the morke  and the woite sart oo the woree sart oo the wooee 
and the woile sar toi wort oo the sar oo the tooee oa the sooee of the caree the was oo the tooee to the tooee th the tooee to the tooee th the tooee to the tooee th the tooee th the tooe      '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           

In [None]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, Callback
from keras.utils import np_utils
from tensorflow.keras.optimizers import Adam
from loguru import logger


class MyCallback(Callback):
    def __init__(self, path=''):
        super(MyCallback, self).__init__()
        self.path = str(path)
    def on_epoch_end(self, epoch, logs=None):
        name = f'{self.path}/model_{epoch}.hdf5' 
        self.model.save_weights(name, overwrite=True)

        filename = "wonderland.txt"
        raw_text = open(filename).read()
        raw_text = raw_text.lower()
        chars = sorted(list(set(raw_text)))
        char_to_int = dict((c, i) for i, c in enumerate(chars))
        n_chars = len(raw_text)
        n_vocab = len(chars)
        seq_length = 100
        dataX = []
        dataY = []
        for i in range(0, n_chars - seq_length, 1):
            seq_in = raw_text[i:i + seq_length]
            seq_out = raw_text[i + seq_length]
            dataX.append([char_to_int[char] for char in seq_in])
            dataY.append(char_to_int[seq_out])
        start = numpy.random.randint(0, len(dataX)-1)
        pattern = dataX[start]
        out_data = ''
        for i in range(500):
            x = numpy.reshape(pattern, (1, len(pattern), 1))
            x = x / float(n_vocab)
            prediction = model.predict(x, verbose=0)
            index = numpy.argmax(prediction)
            result = int_to_char[index]
            seq_in = [int_to_char[value] for value in pattern]
            out_data += result
            pattern.append(index)
            pattern = pattern[1:len(pattern)]
        logger.debug("Generated text:")
        logger.info(out_data)
        logger.debug("Info:")
        logger.info(f'saving model weights with loss: {logs["loss"]} and epoch: {epoch}')


filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
filepath="bw8"
checkpoint = MyCallback(filepath)
callbacks_list = [checkpoint]
model.fit(X, y, epochs=1, batch_size=256, callbacks=callbacks_list)

Total Characters:  163783
Total Vocab:  61
Total Patterns:  163683

2022-05-03 15:54:06.317 | DEBUG    | __main__:on_epoch_end:48 - Generated text:
2022-05-03 15:54:06.318 | INFO     | __main__:on_epoch_end:49 -  ;;  ri f  ne  eid i    f;r r ;     nr                f ;      r ret   t r r; tr n  e d      ;         ;   ;;;r   tp  rr err ;    r  ; ;i rr;r  ; i r; ;;r r;r; r t e t  ;  ; ; r   r    ret  ; ;  ; i;   rtd  r  n i nt     r   n gt;     t n ;  ;; nnrt n td;r  d n   n e   ;   nr ;  g   e     r  ;rr  i            rn r      nn  ;er t ; ;  ;n ;  rn r; fr inr;i f r   n ;  r i r; ; tr  ;; r ;  ;n;    ;   h; i  ;       t;  ;             t     t       ;    r  r   i  r  tt;r t ; ;;f;t  r      ;ar  red; ;;;
2022-05-03 15:54:06.319 | DEBUG    | __main__:on_epoch_end:50 - Info:
2022-05-03 15:54:06.320 | INFO     | __main__:on_epoch_end:51 - saving model weights with loss: 4.241524696350098 and epoch: 0




<keras.callbacks.History at 0x1e313096700>

In [4]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, Callback, TensorBoard
from keras.utils import np_utils
from tensorflow.keras.optimizers import Adam
from loguru import logger


class MyCallback(Callback):
    def __init__(self, path=''):
        super(MyCallback, self).__init__()
        self.path = str(path)
    def on_epoch_end(self, epoch, logs=None):
        name = f'{self.path}/model_{epoch}.hdf5' 
        self.model.save_weights(name, overwrite=True)

        filename = "wonderland.txt"
        raw_text = open(filename).read()
        raw_text = raw_text.lower()
        chars = sorted(list(set(raw_text)))
        char_to_int = dict((c, i) for i, c in enumerate(chars))
        n_chars = len(raw_text)
        n_vocab = len(chars)
        seq_length = 100
        dataX = []
        dataY = []
        for i in range(0, n_chars - seq_length, 1):
            seq_in = raw_text[i:i + seq_length]
            seq_out = raw_text[i + seq_length]
            dataX.append([char_to_int[char] for char in seq_in])
            dataY.append(char_to_int[seq_out])
        start = numpy.random.randint(0, len(dataX)-1)
        pattern = dataX[start]
        out_data = ''
        for i in range(500):
            x = numpy.reshape(pattern, (1, len(pattern), 1))
            x = x / float(n_vocab)
            prediction = model.predict(x, verbose=0)
            index = numpy.argmax(prediction)
            result = int_to_char[index]
            seq_in = [int_to_char[value] for value in pattern]
            out_data += result
            pattern.append(index)
            pattern = pattern[1:len(pattern)]
        logger.debug("Generated text:")
        logger.info(out_data)
        logger.debug("Info:")
        logger.info(f'saving model weights with loss: {logs["loss"]} and epoch: {epoch}')


filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocab)
y = np_utils.to_categorical(dataY)
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# поменял код и изменил оптимизатор и батч сайз для теста MyCallback
filepath="bw8"
tb_callback = TensorBoard('./logs')
callbacks_list = [tb_callback]
model.fit(X, y, epochs=4, batch_size=256, callbacks=callbacks_list)

Total Characters:  163783
Total Vocab:  61
Total Patterns:  163683
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1fe7b9398e0>

Запустить в коммандной строке

    tensorboard --logdir=D:\study\intelectual-sistems\logs\train\