Main chapter 13 homework

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
import numpy as np
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping

raw_text = open('/kaggle/input/dlhwch13/2600-0.txt', 'r', encoding="utf8").read()
raw_text = raw_text.lower()

print(raw_text[:200])

all_words = raw_text.split()
unique_words = list(set(all_words))
print(f'Number of unique words: {len(unique_words)}')

n_chars = len(raw_text)
print(f'Total characters: {n_chars}')

chars = sorted(list(set(raw_text)))
n_vocab = len(chars)
print(f'Total vocabulary (unique characters): {n_vocab}')
print(chars)

index_to_char = dict((i, c) for i, c in enumerate(chars))
char_to_index = dict((c, i) for i, c in enumerate(chars))
print(char_to_index)

seq_length = 160
n_seq = int(n_chars / seq_length)

X = np.zeros((n_seq, seq_length, n_vocab))
Y = np.zeros((n_seq, seq_length, n_vocab))

for i in range(n_seq):
    x_sequence = raw_text[i * seq_length: (i + 1) * seq_length]
    x_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = x_sequence[j]
        index = char_to_index[char]
        x_sequence_ohe[j][index] = 1.
    X[i] = x_sequence_ohe

    y_sequence = raw_text[i * seq_length + 1: (i + 1) * seq_length + 1]
    y_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = y_sequence[j]
        index = char_to_index[char]
        y_sequence_ohe[j][index] = 1.
    Y[i] = y_sequence_ohe

print(X.shape)
print(Y.shape)

tf.random.set_seed(42)
batch_size = 100
hidden_units = 700
n_epoch = 300
dropout = 0.4

model = models.Sequential()
model.add(layers.LSTM(hidden_units, input_shape=(None, n_vocab), return_sequences=True, dropout=dropout))
model.add(layers.LSTM(hidden_units, return_sequences=True, dropout=dropout))
model.add(layers.TimeDistributed(layers.Dense(n_vocab, activation='softmax')))

optimizer = optimizers.RMSprop(learning_rate=0.001)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

print(model.summary())

filepath = "weights/weights_epoch_{epoch:03d}_loss_{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=50, verbose=1, mode='min')


def generate_text(model, gen_length, n_vocab, index_to_char):
    index = np.random.randint(n_vocab)
    y_char = [index_to_char[index]]
    X = np.zeros((1, gen_length, n_vocab))
    for i in range(gen_length):
        X[0, i, index] = 1.
        indices = np.argmax(model.predict(X[:, max(0, i - 99):i + 1, :])[0], 1)
        index = indices[-1]
        y_char.append(index_to_char[index])
    return ''.join(y_char)


class ResultChecker(Callback):
    def __init__(self, model, N, gen_length):
        self.model = model
        self.N = N
        self.gen_length = gen_length

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.N == 0:
            result = generate_text(self.model, self.gen_length, n_vocab, index_to_char)
            print('\nMy War and Peace:\n' + result)


result_checker = ResultChecker(model, 10, 500)

model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=n_epoch, callbacks=[result_checker, checkpoint, early_stop])


﻿chapter i

“well, prince, so genoa and lucca are now just family estates of the
buonapartes. but i warn you, if you don’t tell me that this means war,
if you still try to defend the infamies and horr
Number of unique words: 40095
Total characters: 3220047
Total vocabulary (unique characters): 80
['\n', ' ', '!', '"', '$', '%', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'á', 'â', 'ä', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'í', 'î', 'ï', 'ó', 'ô', 'ö', 'ú', 'ü', 'ý', 'œ', '—', '‘', '’', '“', '”', '\ufeff']
{'\n': 0, ' ': 1, '!': 2, '"': 3, '$': 4, '%': 5, "'": 6, '(': 7, ')': 8, '*': 9, ',': 10, '-': 11, '.': 12, '/': 13, '0': 14, '1': 15, '2': 16, '3': 17, '4': 18, '5': 19, '6': 20, '7': 21, '8': 22, '9': 23, ':': 24, ';': 25, '=': 26, '?': 27, 'a': 28, 'b': 29, 'c': 30, 'd': 31, 'e': 32, 'f': 3

2022-07-16 04:01:52.555620: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 04:01:52.671135: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 04:01:52.671936: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 04:01:52.674189: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 700)         2186800   
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 700)         3922800   
_________________________________________________________________
time_distributed (TimeDistri (None, None, 80)          56080     
Total params: 6,165,680
Trainable params: 6,165,680
Non-trainable params: 0
_________________________________________________________________
None


2022-07-16 04:01:57.407005: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1030400000 exceeds 10% of free system memory.
2022-07-16 04:01:58.821666: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1030400000 exceeds 10% of free system memory.
2022-07-16 04:02:00.005722: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1030400000 exceeds 10% of free system memory.
2022-07-16 04:02:00.773789: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1030400000 exceeds 10% of free system memory.
2022-07-16 04:02:01.546006: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/301


2022-07-16 04:02:04.997128: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



My War and Peace:
à the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule the soule

Epoch 00001: loss improved from inf to 2.88968, saving model to weights/weights_epoch_001_loss_2.8897.hdf5
Epoch 2/301

Epoch 00002: loss improved from 2.88968 to 2.43457, saving model to weights/weights_epoch_002_loss_2.4346.hdf5
Epoch 3/301

Epoch 00003: loss improved from 2.43457 to 2.27391, saving model to weights/weights_epoch_003_loss_2.2739.hdf5
Epoch 4/301

Epoch 00004: loss improved from 2.27391 to 2.18102, saving model to weights/weights_epoch_004_loss_2.1810.hdf5

<keras.callbacks.History at 0x7f9530472fd0>

3 books as input data

In [2]:
import tensorflow as tf
import numpy as np
import os
import pickle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from string import punctuation
from tensorflow.keras import layers, models, losses, optimizers
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
import requests

FirstBook = open('/kaggle/input/dlhwch13/Little Women.txt', 'r', encoding="utf8").read()
SecondBook = open('/kaggle/input/dlhwch13/THE CATCHER IN THE RYE .txt', 'r', encoding="utf8").read()
ThirdBook = open('/kaggle/input/dlhwch13/THE HUNCHBACK OF NOTRE DAME.txt', 'r', encoding="utf8").read()

training_file = FirstBook + SecondBook + ThirdBook
raw_text = training_file.lower()

print(raw_text[:200])

all_words = raw_text.split()
unique_words = list(set(all_words))
print(f'Number of unique words: {len(unique_words)}')
n_chars = len(raw_text)
print(f'Total characters: {n_chars}')

chars = sorted(list(set(raw_text)))
n_vocab = len(chars)
print(f'Total vocabulary (unique characters): {n_vocab}')
print(chars)

index_to_char = dict((i, c) for i, c in enumerate(chars))
char_to_index = dict((c, i) for i, c in enumerate(chars))
print(char_to_index)

seq_length = 160
n_seq = int(n_chars / seq_length)

X = np.zeros((n_seq, seq_length, n_vocab))
Y = np.zeros((n_seq, seq_length, n_vocab))

for i in range(n_seq):
    x_sequence = raw_text[i * seq_length: (i + 1) * seq_length]
    x_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = x_sequence[j]
        index = char_to_index[char]
        x_sequence_ohe[j][index] = 1.
    X[i] = x_sequence_ohe
    y_sequence = raw_text[i * seq_length + 1: (i + 1) * seq_length + 1]
    y_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = y_sequence[j]
        index = char_to_index[char]
        y_sequence_ohe[j][index] = 1.
    Y[i] = y_sequence_ohe

print(X.shape)
print(Y.shape)

tf.random.set_seed(42)
batch_size = 100
hidden_units = 700
n_epoch = 301
dropout = 0.4

model = models.Sequential()
model.add(layers.LSTM(hidden_units, input_shape=(None, n_vocab), return_sequences=True, dropout=dropout))
model.add(layers.LSTM(hidden_units, return_sequences=True, dropout=dropout))
model.add(layers.TimeDistributed(layers.Dense(n_vocab, activation='softmax')))

optimizer = optimizers.RMSprop(learning_rate=0.001)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

print(model.summary())

filepath = "weights/weights_epoch_{epoch:03d}_loss_{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=50, verbose=1, mode='min')


def generate_text(model, gen_length, n_vocab, index_to_char):
    index = np.random.randint(n_vocab)
    y_char = [index_to_char[index]]
    X = np.zeros((1, gen_length, n_vocab))
    for i in range(gen_length):
        X[0, i, index] = 1.
        indices = np.argmax(model.predict(X[:, max(0, i - 99):i + 1, :])[0], 1)
        index = indices[-1]
        y_char.append(index_to_char[index])
    return ''.join(y_char)


class ResultChecker(Callback):
    def __init__(self, model, N, gen_length):
        self.model = model
        self.N = N
        self.gen_length = gen_length

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.N == 0:
            result = generate_text(self.model, self.gen_length, n_vocab, index_to_char)
            print('\nMy result with 3 selected books:\n' + result)


result_checker = ResultChecker(model, 10, 500)

model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=n_epoch, callbacks=[result_checker, checkpoint, early_stop])

﻿“christmas won’t be christmas without any presents,” grumbled jo, lying
on the rug.

“it’s so dreadful to be poor!” sighed meg, looking down at her old
dress.

“i don’t think it’s fair for some girls
Number of unique words: 41601
Total characters: 2480374
Total vocabulary (unique characters): 101
['\n', ' ', '!', '"', '$', '%', '&', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '@', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'à', 'á', 'â', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'î', 'ï', 'ñ', 'ô', 'ù', 'œ', 'α', 'β', 'γ', 'δ', 'ε', 'η', 'ι', 'κ', 'λ', 'μ', 'ν', 'ο', 'ς', 'τ', 'φ', 'ἀ', 'ὀ', 'ά', 'έ', 'ί', 'ό', 'ῖ', '—', '‘', '’', '“', '”', '\ufeff']
{'\n': 0, ' ': 1, '!': 2, '"': 3, '$': 4, '%': 5, '&': 6, "'": 7, '(': 8, ')': 9, '*': 10, ',': 11, '-': 12, '.': 13, '/': 14, '0': 15, '1': 16, '2': 17, '3': 18, '4': 19, '5': 20, '6': 21,

2022-07-16 07:00:54.065644: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 1002049280 exceeds 10% of free system memory.


Epoch 1/301

My result with 3 selected books:
] tit in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it in t it

Epoch 00001: loss improved from inf to 2.98441, saving model to weights/weights_epoch_001_loss_2.9844.hdf5
Epoch 2/301

Epoch 00002: loss improved from 2.98441 to 2.55467, saving model to weights/weights_epoch_002_loss_2.5547.hdf5
Epoch 3/301

Epoch 00003: loss improved from 2.55467 to 2.40763, saving model to weights/weights_epoch_003_loss_2.4076.hdf5
Epoch 4/301

Epoch 00004: loss improved from 2.40763 to 2.32155, saving model to weights/weights

<keras.callbacks.History at 0x7f95303fac90>

Farsi book as input data

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
import numpy as np
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping

raw_text = open('/kaggle/input/dlhwch13/BoofKoor.txt', 'r', encoding="utf8").read()
raw_text = raw_text.lower()

print(raw_text[:200])

all_words = raw_text.split()
unique_words = list(set(all_words))
print(f'Number of unique words: {len(unique_words)}')
n_chars = len(raw_text)
print(f'Total characters: {n_chars}')

chars = sorted(list(set(raw_text)))
n_vocab = len(chars)
print(f'Total vocabulary (unique characters): {n_vocab}')
print(chars)

index_to_char = dict((i, c) for i, c in enumerate(chars))
char_to_index = dict((c, i) for i, c in enumerate(chars))
print(char_to_index)

seq_length = 160
n_seq = int(n_chars / seq_length)

X = np.zeros((n_seq, seq_length, n_vocab))
Y = np.zeros((n_seq, seq_length, n_vocab))

for i in range(n_seq):
    x_sequence = raw_text[i * seq_length: (i + 1) * seq_length]
    x_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = x_sequence[j]
        index = char_to_index[char]
        x_sequence_ohe[j][index] = 1.
    X[i] = x_sequence_ohe
    y_sequence = raw_text[i * seq_length + 1: (i + 1) * seq_length + 1]
    y_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = y_sequence[j]
        index = char_to_index[char]
        y_sequence_ohe[j][index] = 1.
    Y[i] = y_sequence_ohe

print(X.shape)
print(Y.shape)

tf.random.set_seed(42)
batch_size = 100
hidden_units = 700
n_epoch = 301
dropout = 0.4

model = models.Sequential()
model.add(layers.LSTM(hidden_units, input_shape=(None, n_vocab), return_sequences=True, dropout=dropout))
model.add(layers.LSTM(hidden_units, return_sequences=True, dropout=dropout))
model.add(layers.TimeDistributed(layers.Dense(n_vocab, activation='softmax')))

optimizer = optimizers.RMSprop(learning_rate=0.001)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

print(model.summary())

filepath = "weights/weights_epoch_{epoch:03d}_loss_{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=50, verbose=1, mode='min')


def generate_text(model, gen_length, n_vocab, index_to_char):
    index = np.random.randint(n_vocab)
    y_char = [index_to_char[index]]
    X = np.zeros((1, gen_length, n_vocab))
    for i in range(gen_length):
        X[0, i, index] = 1.
        indices = np.argmax(model.predict(X[:, max(0, i - 99):i + 1, :])[0], 1)
        index = indices[-1]
        y_char.append(index_to_char[index])
    return ''.join(y_char)


class ResultChecker(Callback):
    def __init__(self, model, N, gen_length):
        self.model = model
        self.N = N
        self.gen_length = gen_length

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.N == 0:
            result = generate_text(self.model, self.gen_length, n_vocab, index_to_char)
            print('\nMy Boof kour:\n' + result)


result_checker = ResultChecker(model, 10, 500)

model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=n_epoch, callbacks=[result_checker, checkpoint, early_stop])


بوف کور
در زندگی زخمهایی هست که مثل خوره در انزوا روح را آهسته می خورد و می تراشد. این دردها را نمی شود به کسی اظهار کرد، چون عموما عادت دارند که این دردهای باورنکردنی را جزو اتفاقات و پیشامدهای نادر 
Number of unique words: 4821
Total characters: 132864
Total vocabulary (unique characters): 57
['\n', ' ', '!', '(', ')', '-', '.', ':', '|', '«', '»', '،', '؛', '؟', 'ء', 'آ', 'أ', 'ؤ', 'ئ', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'پ', 'چ', 'ژ', 'ک', 'گ', 'ہ', 'ۂ', 'ی', 'ې', '۔']
{'\n': 0, ' ': 1, '!': 2, '(': 3, ')': 4, '-': 5, '.': 6, ':': 7, '|': 8, '«': 9, '»': 10, '،': 11, '؛': 12, '؟': 13, 'ء': 14, 'آ': 15, 'أ': 16, 'ؤ': 17, 'ئ': 18, 'ا': 19, 'ب': 20, 'ة': 21, 'ت': 22, 'ث': 23, 'ج': 24, 'ح': 25, 'خ': 26, 'د': 27, 'ذ': 28, 'ر': 29, 'ز': 30, 'س': 31, 'ش': 32, 'ص': 33, 'ض': 34, 'ط': 35, 'ظ': 36, 'ع': 37, 'غ': 38, 'ف': 39, 'ق': 40, 'ل': 41, 'م': 42, 'ن': 43, 'ه': 44, 'و': 45, 'ى': 46, 'پ

2022-07-16 10:05:59.482527: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 10:05:59.663704: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 10:05:59.664857: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-16 10:05:59.666743: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 700)         2122400   
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 700)         3922800   
_________________________________________________________________
time_distributed (TimeDistri (None, None, 57)          39957     
Total params: 6,085,157
Trainable params: 6,085,157
Non-trainable params: 0
_________________________________________________________________
None


2022-07-16 10:06:04.715328: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/301


2022-07-16 10:06:08.969593: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005



My Boof kour:
ک                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    

Epoch 00001: loss improved from inf to 3.89140, saving model to weights/weights_epoch_001_loss_3.8914.hdf5
Epoch 2/301

Epoch 00002: loss improved from 3.89140 to 2.97425, saving model to weights/weights_epoch_002_loss_2.9743.hdf5
Epoch 3/301

Epoch 00003: loss did not improve from 2.97425
Epoch 4/301

Epoch 00004: loss did not improve from 2.97425
Epoch 5/301

Epoch 00005: loss did not improve from 2.97425
Epoch 6/301

Epoch 00006: loss improved from 2.97425 to 2.96186, saving

<keras.callbacks.History at 0x7fda6847fdd0>

famous Farsi Poet work collection as input data

In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers
import numpy as np
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping

raw_text = open('/kaggle/input/dlhwch13/roodaki_norm.txt', 'r', encoding="utf8").read()
raw_text = raw_text.lower()

print(raw_text[:200])

all_words = raw_text.split()
unique_words = list(set(all_words))
print(f'Number of unique words: {len(unique_words)}')
n_chars = len(raw_text)
print(f'Total characters: {n_chars}')

chars = sorted(list(set(raw_text)))
n_vocab = len(chars)
print(f'Total vocabulary (unique characters): {n_vocab}')
print(chars)

index_to_char = dict((i, c) for i, c in enumerate(chars))
char_to_index = dict((c, i) for i, c in enumerate(chars))
print(char_to_index)

seq_length = 160
n_seq = int(n_chars / seq_length)

X = np.zeros((n_seq, seq_length, n_vocab))
Y = np.zeros((n_seq, seq_length, n_vocab))

for i in range(n_seq):
    x_sequence = raw_text[i * seq_length: (i + 1) * seq_length]
    x_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = x_sequence[j]
        index = char_to_index[char]
        x_sequence_ohe[j][index] = 1.
    X[i] = x_sequence_ohe
    y_sequence = raw_text[i * seq_length + 1: (i + 1) * seq_length + 1]
    y_sequence_ohe = np.zeros((seq_length, n_vocab))
    for j in range(seq_length):
        char = y_sequence[j]
        index = char_to_index[char]
        y_sequence_ohe[j][index] = 1.
    Y[i] = y_sequence_ohe

print(X.shape)
print(Y.shape)

tf.random.set_seed(42)
batch_size = 100
hidden_units = 700
n_epoch = 301
dropout = 0.4

model = models.Sequential()
model.add(layers.LSTM(hidden_units, input_shape=(None, n_vocab), return_sequences=True, dropout=dropout))
model.add(layers.LSTM(hidden_units, return_sequences=True, dropout=dropout))
model.add(layers.TimeDistributed(layers.Dense(n_vocab, activation='softmax')))

optimizer = optimizers.RMSprop(learning_rate=0.001)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

print(model.summary())

filepath = "weights/weights_epoch_{epoch:03d}_loss_{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')

early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=50, verbose=1, mode='min')


def generate_text(model, gen_length, n_vocab, index_to_char):
    index = np.random.randint(n_vocab)
    y_char = [index_to_char[index]]
    X = np.zeros((1, gen_length, n_vocab))
    for i in range(gen_length):
        X[0, i, index] = 1.
        indices = np.argmax(model.predict(X[:, max(0, i - 99):i + 1, :])[0], 1)
        index = indices[-1]
        y_char.append(index_to_char[index])
    return ''.join(y_char)


class ResultChecker(Callback):
    def __init__(self, model, N, gen_length):
        self.model = model
        self.N = N
        self.gen_length = gen_length

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.N == 0:
            result = generate_text(self.model, self.gen_length, n_vocab, index_to_char)
            print('\nMy Roodaki colection:\n' + result)


result_checker = ResultChecker(model, 10, 500)

model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=n_epoch, callbacks=[result_checker, checkpoint, early_stop])


گر من این دوستی تو ببرم تا لب گور
بزنم نعره ولیکن ز تو بینم هنرا
اثر میر نخواهم که بماند به جهان
میر خواهم که بماند به جهان در اثرا
هر که را رفت همی باید رفته شمری
هر که را مرد همی باید مرده شمرا
به ح
Number of unique words: 3868
Total characters: 59205
Total vocabulary (unique characters): 35
['\n', ' ', 'آ', 'ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ل', 'م', 'ن', 'ه', 'و', 'پ', 'چ', 'ژ', 'ک', 'گ', 'ی']
{'\n': 0, ' ': 1, 'آ': 2, 'ا': 3, 'ب': 4, 'ت': 5, 'ث': 6, 'ج': 7, 'ح': 8, 'خ': 9, 'د': 10, 'ذ': 11, 'ر': 12, 'ز': 13, 'س': 14, 'ش': 15, 'ص': 16, 'ض': 17, 'ط': 18, 'ظ': 19, 'ع': 20, 'غ': 21, 'ف': 22, 'ق': 23, 'ل': 24, 'م': 25, 'ن': 26, 'ه': 27, 'و': 28, 'پ': 29, 'چ': 30, 'ژ': 31, 'ک': 32, 'گ': 33, 'ی': 34}
(370, 160, 35)
(370, 160, 35)
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, N

<keras.callbacks.History at 0x7fda79786590>