In [2]:
import tensorflow as tf
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Bidirectional, LSTM, GRU, Dense, TextVectorization, Embedding, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.callbacks import ModelCheckpoint, EarlyStopping
import warnings

warnings.filterwarnings('ignore')
import os
import numpy as np
from tensorflow.keras.utils import to_categorical


path = os.path.join(os.getcwd(), 'Right.txt')

with open(path, "r", encoding="utf-8") as f:
    text = f.read()
text = text[:100000]

text_vec_layer = TextVectorization(split="whitespace", standardize="lower")
text_vec_layer.adapt([text])
encoded = text_vec_layer([text])[0]
n_tokens = text_vec_layer.vocabulary_size()

off = 3
length = off*2+1
X = np.array([np.diag(np.hstack((encoded[i:i+off], encoded[i+off+1:i+length]))) for i in range(n_tokens-length)])
Y = encoded[off:n_tokens-off-1]
print(X.shape, Y.shape, sep='\n')
Y_categorical = to_categorical(Y, num_classes=n_tokens)
model = Sequential()
model.add(Input((length-1, length-1)))
model.add(Bidirectional(GRU(64, return_sequences=True)))
model.add(Bidirectional(GRU(128, return_sequences=True)))
model.add(Bidirectional(GRU(256)))
model.add(Dense(n_tokens, activation="softmax"))

model.summary()
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


history = model.fit(X, Y_categorical, batch_size=128, epochs=100)
model.save("/home/sv/Стільниця/Camp/Lesson33/my_model/my_model.keras")


(6961, 6, 6)
(6961,)


2024-03-20 18:21:28.652969: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 194016992 exceeds 10% of free system memory.


Epoch 1/100


2024-03-20 18:21:29.042603: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 194016992 exceeds 10% of free system memory.


[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 179ms/step - accuracy: 0.0189 - loss: 8.4116
Epoch 2/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 156ms/step - accuracy: 0.0338 - loss: 7.4173
Epoch 3/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 173ms/step - accuracy: 0.0340 - loss: 7.0776
Epoch 4/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 203ms/step - accuracy: 0.0315 - loss: 6.6734
Epoch 5/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 154ms/step - accuracy: 0.0394 - loss: 6.2320
Epoch 6/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 145ms/step - accuracy: 0.0432 - loss: 5.8207
Epoch 7/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 145ms/step - accuracy: 0.0422 - loss: 5.4647
Epoch 8/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 156ms/step - accuracy: 0.0621 - loss: 5.0526
Epoch 9/100
[1m55/55[0m [32m━━━━━━━━━━

In [3]:
def generate_text(seed_text, num_tokens=100, temperature=1.0):
    seed_tokens = text_vec_layer([seed_text])[0]
    tokens = tf.convert_to_tensor(seed_tokens, dtype=seed_tokens.dtype)

    for _ in range(num_tokens):
        X = np.array([np.diag(np.hstack((tokens[-off:], [0] * off)))])
        next_token_probs = model.predict(X)[0]

        next_token_probs = np.power(next_token_probs, 1.0 / temperature)
        next_token_probs = next_token_probs / np.sum(next_token_probs)

        next_token = sample(next_token_probs)
        tokens = tf.concat([tokens, [next_token]], axis=0)

    generated_text = ' '.join(text_vec_layer.get_vocabulary()[token] for token in tokens.numpy())
    return generated_text

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


In [15]:
seed_text = 'Вони найтісніше контактували з ельфами, маючи'
generated_text = generate_text(seed_text, num_tokens=10, temperature=0.1)
print(generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Вони найтісніше контактували з ельфами, маючи Переґріна Торін вогнем, води, хоч хоч хоч Гаваней вибачали зберігся»,


In [16]:
eed_text = 'Вони найтісніше контактували з ельфами, маючи'
generated_text = generate_text(seed_text, num_tokens=10, temperature=1)
print(generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Вони найтісніше контактували з ельфами, маючи Переґріна багатство. вогнем, друзів друзів друзів сцен доживали води, хоч
