**Обязательно включить поддержку GPU в среде выполнения и проверить этот момент**

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2143991031410176328
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14619377664
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6851995255973677477
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [None]:
# Установка необходимых библиотек
!pip install numpy tensorflow nltk

import numpy as np
import random
import nltk
from nltk.util import ngrams
from collections import defaultdict, Counter
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
# Загрузка данных Шекспира
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
with open('input.txt', 'r', encoding='utf-8') as file:
    text = file.read()
print(f"Длина текста: {len(text)} символов")
print(text[:500])  # Показать начало текста

--2025-05-31 04:47:59--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2025-05-31 04:47:59 (55.5 MB/s) - ‘input.txt’ saved [1115394/1115394]

Длина текста: 1115394 символов
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, go

In [None]:
## Реализация на N-граммах (Markov Chain)

def build_markov_chain(text, n=4):
    # Токенизация по словам
    tokens = nltk.word_tokenize(text)

    # Создание N-грамм
    n_grams = list(ngrams(tokens, n))

    # Построение цепочки Маркова
    chain = defaultdict(Counter)
    for *words, next_word in n_grams:
        chain[tuple(words)][next_word] += 1

    return chain

def generate_text_markov(chain, seed=None, length=50):
    if seed is None or len(seed.split()) != len(next(iter(chain.keys()))):
        seed = random.choice(list(chain.keys()))
    else:
        seed = tuple(nltk.word_tokenize(seed))

    result = list(seed)

    for _ in range(length):
        next_words = chain.get(seed, Counter())
        if not next_words:
            break
        next_word = random.choices(
            list(next_words.keys()),
            weights=list(next_words.values())
        )[0]
        result.append(next_word)
        seed = tuple(result[-len(seed):])

    return ' '.join(result)

# Строим модель
markov_chain = build_markov_chain(text, n=4)

In [None]:
# Генерация текста
print("\nГенерация с помощью N-грамм:")
print(generate_text_markov(markov_chain, seed="First, you know Caius Marcius is chief enemy to the people", length=50))


Генерация с помощью N-грамм:
and goodness on you ! For I ne'er saw true beauty till this night . CORIOLANUS : Pray you , let 's go . GRUMIO : The note lies in 's throat , And turn 'd my captive state to liberty , My fear to hope , my sorrows unto joys , At


In [None]:
## Реализация на LSTM

# Подготовка данных для LSTM
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts([text])
total_chars = len(tokenizer.word_index) + 1

# Создание последовательностей
seq_length = 100
step = 3
sequences = []
next_chars = []

for i in range(0, len(text) - seq_length, step):
    sequences.append(text[i:i + seq_length])
    next_chars.append(text[i + seq_length])

# Векторизация
X = np.zeros((len(sequences), seq_length, total_chars), dtype=np.bool_)
y = np.zeros((len(sequences), total_chars), dtype=np.bool_)

for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        X[i, t, tokenizer.texts_to_sequences([char])[0][0]] = 1
    y[i, tokenizer.texts_to_sequences([next_chars[i]])[0][0]] = 1

# Создание модели LSTM
model = Sequential([
    LSTM(128, input_shape=(seq_length, total_chars)),
    Dense(total_chars, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Обучение модели (уменьшено для Colab)
model.fit(X, y, batch_size=128, epochs=20, verbose=1)

# Функция генерации текста с LSTM
def generate_text_lstm(model, tokenizer, seed, length=400, temperature=1.0):
    generated = seed
    for _ in range(length):
        x = np.zeros((1, seq_length, total_chars))
        for t, char in enumerate(seed[-seq_length:]):
            x[0, t, tokenizer.texts_to_sequences([char])[0][0]] = 1

        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = tokenizer.index_word[next_index]

        generated += next_char
        seed = seed[-seq_length:] + next_char

    return generated

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

  super().__init__(**kwargs)


Epoch 1/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 9ms/step - loss: 2.5921
Epoch 2/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 8ms/step - loss: 2.0266
Epoch 3/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 8ms/step - loss: 1.8587
Epoch 4/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - loss: 1.7499
Epoch 5/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - loss: 1.6822
Epoch 6/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - loss: 1.6326
Epoch 7/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - loss: 1.5937
Epoch 8/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - loss: 1.5551
Epoch 9/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - loss: 1.5337
Epoch 10/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:
# Генерация текста
print("\nГенерация с помощью LSTM:")
print(generate_text_lstm(model, tokenizer, seed="First, you know Caius Marcius is chief enemy to the people", length=200, temperature=0.6))


Генерация с помощью LSTM:
First, you know Caius Marcius is chief enemy to the people,soeeas

aleeeeeeiieeeeeeiaeseiaseaieeooent will from her love.

lady capulet:
i have we cannot a privone of your house:
and son are you all all so mean to her,
he hath the prave in a tears where it m


In [None]:
## Интерактивный режим
def interactive_generator():
    print("\nИнтерактивный генератор текста")
    print("Выберите модель:")
    print("1. N-граммы (Markov Chain)")
    print("2. LSTM")
    choice = input("Ваш выбор (1/2): ")

    seed = input("Введите начальную фразу: ")
    length = int(input("Длина генерируемого текста (слов для N-грамм / символов для LSTM): "))

    if choice == '1':
        result = generate_text_markov(markov_chain, seed=seed, length=length)
    else:
        temperature = float(input("Показатель креативности (0.1-1.5): "))
        result = generate_text_lstm(model, tokenizer, seed=seed, length=length, temperature=temperature)

    print("\nРезультат:")
    print(result)

# Запуск интерактивного режима
interactive_generator()


Интерактивный генератор текста
Выберите модель:
1. N-граммы (Markov Chain)
2. LSTM
Ваш выбор (1/2): 1
Введите начальную фразу: Romeo
Длина генерируемого текста (слов для Markov Chain / символов для LSTM): 50

Результат:
thou dost suspect That I have been drinking hard all night , and is almost mature for the violent breaking out . Volsce : It is no other : show your wisdom , daughter , yours . POLIXENES : Let him thank me , that holp to send him thither ; For he
