<a href="https://colab.research.google.com/github/PhParsa/PhParsa/blob/main/Another_copy_of_Parsashahname_ipynb_txt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This is a companion notebook for the book [Deep Learning with Python, Third Edition](https://www.manning.com/books/deep-learning-with-python-third-edition). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**

The book's contents are available online at [deeplearningwithpython.io](https://deeplearningwithpython.io).

In [None]:
!pip install keras keras-hub --upgrade -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m0.8/1.5 MB[0m [31m22.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
keras-nlp 0.21.1 requires keras-hub==0.21.1, but you have keras-hub 0.23.0 which is incompatible.[0m[31m
[0m

In [None]:
import os
os.environ["KERAS_BACKEND"] = "jax"

In [None]:
# @title
import os
from IPython.core.magic import register_cell_magic

@register_cell_magic
def backend(line, cell):
    current, required = os.environ.get("KERAS_BACKEND", ""), line.split()[-1]
    if current == required:
        get_ipython().run_cell(cell)
    else:
        print(
            f"This cell requires the {required} backend. To run it, change KERAS_BACKEND to "
            f"\"{required}\" at the top of the notebook, restart the runtime, and rerun the notebook."
        )

## Language models and the Transformer

### The language model

#### Training a Shakespeare language model

In [None]:
import keras

filename = keras.utils.get_file(
    origin=(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "data/shakespeare.txt"
    ),
)
shakespeare = open(filename, "r").read()

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print(shakespeare[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [None]:
import tensorflow as tf

sequence_length = 100

def split_input(input, sequence_length):
    for i in range(0, len(input), sequence_length):
        yield input[i : i + sequence_length]

features = list(split_input(shakespeare[:-1], sequence_length))
labels = list(split_input(shakespeare[1:], sequence_length))
dataset = tf.data.Dataset.from_tensor_slices((features, labels))

In [None]:
x, y = next(dataset.as_numpy_iterator())
x[:50], y[:50]

(b'First Citizen:\nBefore we proceed any further, hear',
 b'irst Citizen:\nBefore we proceed any further, hear ')

In [None]:
from keras import layers

tokenizer = layers.TextVectorization(
    standardize=None,
    split="character",
    output_sequence_length=sequence_length,
)
tokenizer.adapt(dataset.map(lambda text, labels: text))

In [None]:
vocabulary_size = tokenizer.vocabulary_size()
vocabulary_size

67

In [None]:
dataset = dataset.map(
    lambda features, labels: (tokenizer(features), tokenizer(labels)),
    num_parallel_calls=8,
)
training_data = dataset.shuffle(10_000).batch(64).cache()

In [None]:
a,b = next(iter(training_data))

In [None]:
embedding_dim = 256
hidden_dim = 1024

inputs = layers.Input(shape=(sequence_length,), dtype="int", name="token_ids")
x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)
x = layers.GRU(hidden_dim, return_sequences=True)(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(vocabulary_size, activation="softmax")(x)
model = keras.Model(inputs, outputs)

In [None]:
model.summary(line_length=80)

In [None]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
model.fit(training_data, epochs=20)

Epoch 1/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 92ms/step - loss: 2.7234 - sparse_categorical_accuracy: 0.2800
Epoch 2/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 70ms/step - loss: 2.0036 - sparse_categorical_accuracy: 0.4131
Epoch 3/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 71ms/step - loss: 1.7371 - sparse_categorical_accuracy: 0.4843
Epoch 4/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 71ms/step - loss: 1.5846 - sparse_categorical_accuracy: 0.5250
Epoch 5/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 70ms/step - loss: 1.4902 - sparse_categorical_accuracy: 0.5496
Epoch 6/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 70ms/step - loss: 1.4239 - sparse_categorical_accuracy: 0.5667
Epoch 7/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 70ms/step - loss: 1.3731 - sparse_categorical_accuracy: 0.5794
Epoch 8/20
[

<keras.src.callbacks.history.History at 0x79d480d140e0>

#### Generating Shakespeare

In [None]:
inputs = keras.Input(shape=(1,), dtype="int", name="token_ids")
input_state = keras.Input(shape=(hidden_dim,), name="state")

x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)
x, output_state = layers.GRU(hidden_dim, return_state=True)(
    x, initial_state=input_state
)
outputs = layers.Dense(vocabulary_size, activation="softmax")(x)
generation_model = keras.Model(
    inputs=(inputs, input_state),
    outputs=(outputs, output_state),
)
generation_model.set_weights(model.get_weights())

In [None]:
tokens = tokenizer.get_vocabulary()
token_ids = range(vocabulary_size)
char_to_id = dict(zip(tokens, token_ids))
id_to_char = dict(zip(token_ids, tokens))

prompt = """
KING RICHARD III:
"""

In [None]:
input_ids = [char_to_id[c] for c in prompt]
state = keras.ops.zeros(shape=(1, hidden_dim))
for token_id in input_ids:
    inputs = keras.ops.expand_dims([token_id], axis=0)
    predictions, state = generation_model.predict((inputs, state), verbose=0)

In [None]:
import numpy as np

generated_ids = []
max_length = 250
for i in range(max_length):
    next_char = int(np.argmax(predictions, axis=-1)[0])
    generated_ids.append(next_char)
    inputs = keras.ops.expand_dims([next_char], axis=0)
    predictions, state = generation_model.predict((inputs, state), verbose=0)

In [None]:
output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


KING RICHARD III:
Say the truth of the death of the dead brother's life,
And see him dead, some reason which the heavens,
Which seem'd in the night of his death,
And bring thee to thy head;
And so defend your highness to the banish'd youth
To thee again, and will not 


### Lets try with Shahnameh!

In [None]:
!wget -q https://github.com/Alireza-Akhavan/rnn-notebooks/raw/refs/heads/master/shahnameh.txt

In [None]:
shahnameh = open("shahnameh.txt", "rb").read()
shahnameh[:20].decode('utf-8', errors='ignore')

'|به نام خدا'

In [None]:
import tensorflow as tf

sequence_length = 100

def split_input(input, sequence_length):
    for i in range(0, len(input), sequence_length):
        yield input[i : i + sequence_length]

features = list(split_input(shahnameh[:-1], sequence_length))
labels = list(split_input(shahnameh[1:], sequence_length))
dataset = tf.data.Dataset.from_tensor_slices((features, labels))

In [None]:
x, y = next(dataset.as_numpy_iterator())
print(x[1:12].decode('utf-8', errors='ignore'))
print(y[1:12].decode('utf-8', errors='ignore'))

به نام
ه نام 


In [None]:
from keras import layers

tokenizer = layers.TextVectorization(
    standardize=None,
    split="character",
    output_sequence_length=sequence_length,
)
tokenizer.adapt(dataset.map(lambda text, labels: text))

In [None]:
vocabulary_size = tokenizer.vocabulary_size()
vocabulary_size

51

In [None]:
tokenizer.get_vocabulary()

['',
 '[UNK]',
 np.str_(' '),
 np.str_('ا'),
 np.str_('ر'),
 np.str_('ن'),
 np.str_('د'),
 np.str_('ی'),
 np.str_('و'),
 np.str_('ه'),
 np.str_('ب'),
 np.str_('|'),
 np.str_('\n'),
 np.str_('م'),
 np.str_('ت'),
 np.str_('ش'),
 np.str_('ز'),
 np.str_('س'),
 np.str_('ک'),
 np.str_('گ'),
 np.str_('�'),
 np.str_('خ'),
 np.str_('پ'),
 np.str_('ف'),
 np.str_('چ'),
 np.str_('ج'),
 np.str_('آ'),
 np.str_('ل'),
 np.str_('\u200c'),
 np.str_('غ'),
 np.str_('ژ'),
 np.str_('ق'),
 np.str_('ذ'),
 np.str_('ٔ'),
 np.str_('ع'),
 np.str_('ص'),
 np.str_('ط'),
 np.str_('ح'),
 np.str_('ث'),
 np.str_('ض'),
 np.str_('ظ'),
 np.str_('ئ'),
 np.str_('،'),
 np.str_('؟'),
 np.str_(')'),
 np.str_('('),
 np.str_('أ'),
 np.str_('ء'),
 np.str_('»'),
 np.str_('«'),
 np.str_('ؤ')]

In [None]:
dataset = dataset.map(
    lambda features, labels: (tokenizer(features), tokenizer(labels)),
    num_parallel_calls=8,
)
training_data = dataset.shuffle(10_000).batch(64).cache()

In [None]:
a,b = next(iter(training_data))

In [None]:
embedding_dim = 256
hidden_dim = 1024

inputs = layers.Input(shape=(sequence_length,), dtype="int", name="token_ids")
x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)
x = layers.GRU(hidden_dim, return_sequences=True)(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(vocabulary_size, activation="softmax")(x)
model = keras.Model(inputs, outputs)

In [None]:
model.summary(line_length=80)

In [None]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
model.fit(training_data, epochs=20)

Epoch 1/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 74ms/step - loss: 1.2070 - sparse_categorical_accuracy: 0.6594
Epoch 2/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 71ms/step - loss: 0.5840 - sparse_categorical_accuracy: 0.8303
Epoch 3/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 71ms/step - loss: 0.4998 - sparse_categorical_accuracy: 0.8500
Epoch 4/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 71ms/step - loss: 0.4655 - sparse_categorical_accuracy: 0.8588
Epoch 5/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 71ms/step - loss: 0.4435 - sparse_categorical_accuracy: 0.8649
Epoch 6/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 71ms/step - loss: 0.4252 - sparse_categorical_accuracy: 0.8700
Epoch 7/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 70ms/step - loss: 0.4083 - sparse_categorical_accuracy: 0.8748
Epoch 8/20
[

<keras.src.callbacks.history.History at 0x7933862571a0>

In [None]:
inputs = keras.Input(shape=(1,), dtype="int", name="token_ids")
input_state = keras.Input(shape=(hidden_dim,), name="state")

x = layers.Embedding(vocabulary_size, embedding_dim)(inputs)
x, output_state = layers.GRU(hidden_dim, return_state=True)(
    x, initial_state=input_state
)
outputs = layers.Dense(vocabulary_size, activation="softmax")(x)
generation_model = keras.Model(
    inputs=(inputs, input_state),
    outputs=(outputs, output_state),
)
generation_model.set_weights(model.get_weights())

In [None]:
tokens = tokenizer.get_vocabulary()
token_ids = range(vocabulary_size)
char_to_id = dict(zip(tokens, token_ids))
id_to_char = dict(zip(token_ids, tokens))

prompt = """
پارسا پورهمدانی نامی را
"""

In [None]:
input_ids = [char_to_id[c] for c in prompt]
state = keras.ops.zeros(shape=(1, hidden_dim))
for token_id in input_ids:
    inputs = keras.ops.expand_dims([token_id], axis=0)
    predictions, state = generation_model.predict((inputs, state), verbose=0)

In [None]:
import numpy as np

generated_ids = []
max_length = 250
for i in range(max_length):
    next_char = int(np.argmax(predictions, axis=-1)[0])
    generated_ids.append(next_char)
    inputs = keras.ops.expand_dims([next_char], axis=0)
    predictions, state = generation_model.predict((inputs, state), verbose=0)

In [None]:
output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا نامی
|به دانایی او سال نرای
|ازان پس نبین�


In [None]:
output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا پورهمدانی
|که این باد باید بدین بارگاه
|کجا آن سخن پ�


In [None]:

output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا
|نهادی به دانا به دست
|که بی‌تو بباشید با م�


In [None]:

output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


رستم و پارسا
|نهادی به دانا به دست
|که بی‌تو بباشید با م�


In [None]:


output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا را
|نهادی به دانا به دست
|که بی‌تو بباشید با م�


In [None]:


output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا نامی را
|نهادی به دانا به دست
|که بی‌تو بباشید با م�


In [None]:


output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا پورهمدانی نامی را
|بدانست کان اختر شا�


In [None]:


output = "".join([id_to_char[token_id] for token_id in generated_ids])
print(prompt + output)


پارسا پورهمدانی نامی را
|بدانست کان اختر شاه �


In [None]:
embedding_dim = 256
hidden_dim = 1024

inputs = layers.Input(shape=(sequence_length,), dtype="int", name="token_ids")
x = layers.Embedding(vocabulary_size, 256)(inputs)
x = layers.Dropout(0.1)(x)

x = layers.LSTM(1024, return_sequences=True, dropout=0.2, recurrent_dropout=0.1)(x)
x = layers.LSTM(1024, return_sequences=True, dropout=0.2, recurrent_dropout=0.1)(x)

x = layers.LayerNormalization()(x)
outputs = layers.Dense(vocabulary_size, activation="softmax")(x)

model = keras.Model(inputs, outputs)

In [None]:
model.summary(line_length=80)

In [None]:
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
model.fit(training_data, epochs=20)

Epoch 1/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 347ms/step - loss: 1.1902 - sparse_categorical_accuracy: 0.6648
Epoch 2/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 342ms/step - loss: 0.7260 - sparse_categorical_accuracy: 0.7876
Epoch 3/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 342ms/step - loss: 0.5571 - sparse_categorical_accuracy: 0.8346
Epoch 4/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 341ms/step - loss: 0.5019 - sparse_categorical_accuracy: 0.8485
Epoch 5/20
[1m728/728[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 342ms/step - loss: 0.4796 - sparse_categorical_accuracy: 0.8544
Epoch 6/20
[1m261/728[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m2:39[0m 342ms/step - loss: 0.4586 - sparse_categorical_accuracy: 0.8590

KeyboardInterrupt: 