<a href="https://colab.research.google.com/github/NehaParveen03/my_first_report/blob/neha1/Seq2Seq_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

text = "the quick brown fox jumps over the lazy dog. the dog barked at the fox."
chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))


maxlen = 10  # Sequence length
step = 1     # Step size for sequence generation
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

# Use bool instead of np.bool
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

# Model Architecture
latent_dim = 128  # Latent dimensionality of the encoding space.

# Encoder
encoder_inputs = Input(shape=(maxlen, len(chars)))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None, len(chars)))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(len(chars), activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile
model.compile(optimizer='adam', loss='categorical_crossentropy')

# Training
epochs = 50
batch_size = 64

# Prepare decoder input data
decoder_input_data = np.zeros((len(sentences), 1, len(chars)), dtype='float32')
decoder_target_data = np.zeros((len(sentences), 1, len(chars)), dtype='float32')

for i in range(len(sentences)):
    decoder_input_data[i, 0, char_indices[next_chars[i]]] = 1.
    decoder_target_data[i, 0, char_indices[next_chars[i]]] = 1.

model.fit([x, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2)

# Inference Setup
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

def generate_text(input_text, length=100):
    input_seq = np.zeros((1, maxlen, len(chars)), dtype=np.bool)
    for t, char in enumerate(input_text):
        input_seq[0, t, char_indices[char]] = 1.

    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, len(chars)))
    target_seq[0, 0, char_indices[input_text[-1]]] = 1.

    generated_text = input_text

    for i in range(length):
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = indices_char[sampled_token_index]
        generated_text += sampled_char

        target_seq = np.zeros((1, 1, len(chars)))
        target_seq[0, 0, sampled_token_index] = 1.

        states_value = [h, c]

    return generated_text

# Example usage
start_text = "the quick "
generated_text = generate_text(start_text, length=50)
print(generated_text)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 3.3292 - val_loss: 3.3298
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 3.3138 - val_loss: 3.3170
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - loss: 3.2981 - val_loss: 3.3040
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - loss: 3.2820 - val_loss: 3.2904
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - loss: 3.2651 - val_loss: 3.2760
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - loss: 3.2471 - val_loss: 3.2603
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 3.2276 - val_loss: 3.2429
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - loss: 3.2061 - val_loss: 3.2231
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

  input_seq = np.zeros((1, maxlen, len(chars)), dtype=np.bool)


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations