In [9]:
import numpy as np
import os
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

# Parameters
batch_size = 64
epochs = 100
latent_dim = 256  # Hidden state dimension
num_samples = 10000
data_path = '/content/fra.txt'

# Initialize data structures
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

# Verify that the file exists and is being read correctly
if os.path.exists(data_path):
    with open(data_path, 'r', encoding='utf-8') as f:
        lines = f.read().split('\n')

    # Print the first few lines to check the content
    print("First few lines of the file:")
    for i, line in enumerate(lines[:10]):
        print(f"Line {i + 1}: {line}")

    # Process the data
    for line in lines[:min(num_samples, len(lines) - 1)]:
        line = line.strip()  # Remove leading/trailing whitespace
        parts = line.split('\t')

        # Ensure the line has both input and target sentences
        if len(parts) < 2:
            print(f"Skipping line (not properly formatted): {line}")
            continue

        input_text = parts[0]
        target_text = parts[1]

        # Add special start and end tokens for the target text
        target_text = '\t' + target_text + '\n'

        input_texts.append(input_text)
        target_texts.append(target_text)

        # Collect unique characters in input and target texts
        for char in input_text:
            if char not in input_characters:
                input_characters.add(char)
        for char in target_text:
            if char not in target_characters:
                target_characters.add(char)

    # Debugging print statements to check input_texts and target_texts
    print(f"Number of input texts: {len(input_texts)}")
    print(f"Number of target texts: {len(target_texts)}")

    # Check if input_texts or target_texts are empty
    if not input_texts or not target_texts:
        raise ValueError("input_texts or target_texts is empty")

    # Proceed to calculate sequence lengths
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])

    print('Number of samples:', len(input_texts))
    print('Number of unique input tokens:', len(input_characters))
    print('Number of unique output tokens:', len(target_characters))
    print('Max sequence length for inputs:', max_encoder_seq_length)
    print('Max sequence length for outputs:', max_decoder_seq_length)

    # Create token index dictionaries
    input_token_index = dict([(char, i) for i, char in enumerate(sorted(input_characters))])
    target_token_index = dict([(char, i) for i, char in enumerate(sorted(target_characters))])

    # Initialize encoder and decoder data arrays
    encoder_input_data = np.zeros(
        (len(input_texts), max_encoder_seq_length, len(input_characters)),
        dtype='float32'
    )
    decoder_input_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, len(target_characters)),
        dtype='float32'
    )
    decoder_target_data = np.zeros(
        (len(input_texts), max_decoder_seq_length, len(target_characters)),
        dtype='float32'
    )

    # Populate the data arrays
    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.
        for t, char in enumerate(target_text):
            decoder_input_data[i, t, target_token_index[char]] = 1.
            if t > 0:
                # decoder_target_data is ahead of decoder_input_data by one timestep
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.

    # Define the model
    # Encoder
    encoder_inputs = Input(shape=(None, len(input_characters)))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]

    # Decoder
    decoder_inputs = Input(shape=(None, len(target_characters)))
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(len(target_characters), activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the full model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    # Compile and train the model
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
              batch_size=batch_size,
              epochs=epochs,
              validation_split=0.2)
else:
    print(f"File not found: {data_path}")


First few lines of the file:
Line 1: Go.    Va !
Line 2: Hi.    Salut !
Line 3: Run!    Cours !
Line 4: Wait!    Attends !
Line 5: Hello!    Bonjour !
Line 6: I see.    Je vois.
Line 7: Nice.    Sympa.
Line 8: Yes.    Oui.
Line 9: No.    Non.
Line 10: Thanks.    Merci.
Skipping line (not properly formatted): Go.    Va !
Skipping line (not properly formatted): Hi.    Salut !
Skipping line (not properly formatted): Run!    Cours !
Skipping line (not properly formatted): Wait!    Attends !
Skipping line (not properly formatted): Hello!    Bonjour !
Skipping line (not properly formatted): I see.    Je vois.
Skipping line (not properly formatted): Nice.    Sympa.
Skipping line (not properly formatted): Yes.    Oui.
Skipping line (not properly formatted): No.    Non.
Skipping line (not properly formatted): Thanks.    Merci.
Number of input texts: 0
Number of target texts: 0


ValueError: max() arg is an empty sequence

In [None]:
import numpy as np
import os
import logging
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

# Initialize logging
logging.basicConfig(level=logging.INFO)

# Parameters
batch_size = 64
epochs = 100
latent_dim = 256  # Hidden state dimension
num_samples = 10000
data_path = '/content/fra.txt'

# Initialize data structures
input_texts = []
target_texts = []
input_characters = set()
target_characters = set()

# Function to load and preprocess data
def load_data(data_path, num_samples):
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")

    with open(data_path, 'r', encoding='utf-8') as f:
        lines = f.read().split('\n')

    logging.info("First few lines of the file:")
    for i, line in enumerate(lines[:10]):
        logging.info(f"Line {i + 1}: {line}")

    input_texts = []
    target_texts = []
    input_characters = set()
    target_characters = set()

    for line in lines[:min(num_samples, len(lines) - 1)]:
        line = line.strip()  # Remove leading/trailing whitespace
        # Split line by any whitespace (space or tab)
        parts = line.split(maxsplit=1)
        if len(parts) < 2:
            logging.warning(f"Skipping line (not properly formatted): {line}")
            continue
        input_text = parts[0].strip()
        target_text = '\t' + parts[1].strip() + '\n'
        input_texts.append(input_text)
        target_texts.append(target_text)
        input_characters.update(set(input_text))
        target_characters.update(set(target_text))

    if not input_texts or not target_texts:
        raise ValueError("input_texts or target_texts is empty")

    return input_texts, target_texts, input_characters, target_characters

# Function to vectorize data
def vectorize_data(input_texts, target_texts, input_characters, target_characters):
    max_encoder_seq_length = max([len(txt) for txt in input_texts])
    max_decoder_seq_length = max([len(txt) for txt in target_texts])
    input_token_index = {char: i for i, char in enumerate(sorted(input_characters))}
    target_token_index = {char: i for i, char in enumerate(sorted(target_characters))}

    encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, len(input_characters)), dtype='float32')
    decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, len(target_characters)), dtype='float32')
    decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, len(target_characters)), dtype='float32')

    for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
        for t, char in enumerate(input_text):
            encoder_input_data[i, t, input_token_index[char]] = 1.
        for t, char in enumerate(target_text):
            decoder_input_data[i, t, target_token_index[char]] = 1.
            if t > 0:
                decoder_target_data[i, t - 1, target_token_index[char]] = 1.

    return encoder_input_data, decoder_input_data, decoder_target_data, max_encoder_seq_length, max_decoder_seq_length, input_token_index, target_token_index

# Load and preprocess data
input_texts, target_texts, input_characters, target_characters = load_data(data_path, num_samples)

# Vectorize data
encoder_input_data, decoder_input_data, decoder_target_data, max_encoder_seq_length, max_decoder_seq_length, input_token_index, target_token_index = vectorize_data(input_texts, target_texts, input_characters, target_characters)

# Define the model
# Encoder
encoder_inputs = Input(shape=(None, len(input_characters)))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None, len(target_characters)))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(len(target_characters), activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the full model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile and train the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2)
for seq_index in range(10):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)



Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0139 - loss: 1.6853 - val_accuracy: 0.0278 - val_loss: 1.1509
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 0.0764 - loss: 1.6773 - val_accuracy: 0.0278 - val_loss: 1.1495
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step - accuracy: 0.0833 - loss: 1.6704 - val_accuracy: 0.0000e+00 - val_loss: 1.1480
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 162ms/step - accuracy: 0.0972 - loss: 1.6635 - val_accuracy: 0.0000e+00 - val_loss: 1.1463
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - accuracy: 0.0972 - loss: 1.6561 - val_accuracy: 0.0000e+00 - val_loss: 1.1442
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step - accuracy: 0.0972 - loss: 1.6475 - val_accuracy: 0.0000e+00 - val_loss: 1.1411
Epoch 7/100
[1m1/1[

InvalidArgumentError: Graph execution error:

Detected at node functional_8_1/lstm_8_1/while/lstm_cell_1/MatMul defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-12-36d066d341da>", line 111, in <cell line: 109>

  File "<ipython-input-11-b02a6d5bd210>", line 81, in decode_sequence

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 508, in predict

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 208, in one_step_on_data_distributed

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 198, in one_step_on_data

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 96, in predict_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 175, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.10/dist-packages/keras/src/models/functional.py", line 556, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/lstm.py", line 570, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/rnn.py", line 406, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/lstm.py", line 565, in inner_loop

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/rnn.py", line 346, in inner_loop

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/rnn.py", line 428, in rnn

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/rnn.py", line 411, in _step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/rnn.py", line 338, in step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py", line 882, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/layers/rnn/lstm.py", line 264, in call

  File "/usr/local/lib/python3.10/dist-packages/keras/src/ops/numpy.py", line 3445, in matmul

  File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/numpy.py", line 477, in matmul

Matrix size-incompatible: In[0]: [1,22], In[1]: [7,1024]
	 [[{{node functional_8_1/lstm_8_1/while/lstm_cell_1/MatMul}}]] [Op:__inference_one_step_on_data_distributed_50548]

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

# Sample data
input_texts = ["Hello", "How are you?", "Good morning"]
target_texts = ["Bonjour", "Comment ça va?", "Bon matin"]

# Tokenize the data
input_tokenizer = tf.keras.preprocessing.text.Tokenizer()
target_tokenizer = tf.keras.preprocessing.text.Tokenizer()

input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

max_encoder_seq_length = max([len(seq) for seq in input_sequences])
max_decoder_seq_length = max([len(seq) for seq in target_sequences])

num_encoder_tokens = len(input_tokenizer.word_index) + 1
num_decoder_tokens = len(target_tokenizer.word_index) + 1

# Padding sequences
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_encoder_seq_length, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=max_decoder_seq_length, padding='post')

# One-hot encoding
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')

for i, (input_seq, target_seq) in enumerate(zip(input_sequences, target_sequences)):
    for t, word_index in enumerate(input_seq):
        encoder_input_data[i, t, word_index] = 1.
    for t, word_index in enumerate(target_seq):
        decoder_input_data[i, t, word_index] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, word_index] = 1.

# Define the model
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder_lstm = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()

# Train the model
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=64, epochs=100, validation_split=0.2)

# Inference models for prediction
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(256,))
decoder_state_input_c = Input(shape=(256,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# Function to decode sequences
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # The token 'startseq' doesn't exist, start with the first token instead
    sampled_token_index = 0
    target_seq[0, 0, sampled_token_index] = 1.

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = target_tokenizer.index_word[sampled_token_index]
        decoded_sentence += ' ' + sampled_word

        if (sampled_word == 'endseq' or len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.
        states_value = [h, c]

    return decoded_sentence

# Test the model
for seq_index in range(len(input_texts)):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 1.2998 - val_loss: 1.2943
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step - loss: 1.2475 - val_loss: 1.2854
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - loss: 1.2068 - val_loss: 1.2768
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266ms/step - loss: 1.1692 - val_loss: 1.2680
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 1.1319 - val_loss: 1.2583
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 1.0938 - val_loss: 1.2477
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - loss: 1.0539 - val_loss: 1.2361
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - loss: 1.0118 - val_loss: 1.2234
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 184ms/step


KeyError: 0

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

# Sample data
input_texts = ["Hello", "How are you?", "Good morning"]
target_texts = ["Bonjour", "Comment ça va?", "Bon matin"]

# Tokenize the data
input_tokenizer = tf.keras.preprocessing.text.Tokenizer()
target_tokenizer = tf.keras.preprocessing.text.Tokenizer()

input_tokenizer.fit_on_texts(input_texts)
target_tokenizer.fit_on_texts(target_texts)

input_sequences = input_tokenizer.texts_to_sequences(input_texts)
target_sequences = target_tokenizer.texts_to_sequences(target_texts)

max_encoder_seq_length = max([len(seq) for seq in input_sequences])
max_decoder_seq_length = max([len(seq) for seq in target_sequences])

num_encoder_tokens = len(input_tokenizer.word_index) + 1
num_decoder_tokens = len(target_tokenizer.word_index) + 1

# Padding sequences
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_encoder_seq_length, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=max_decoder_seq_length, padding='post')

# One-hot encoding
encoder_input_data = np.zeros((len(input_texts), max_encoder_seq_length, num_encoder_tokens), dtype='float32')
decoder_input_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')

for i, (input_seq, target_seq) in enumerate(zip(input_sequences, target_sequences)):
    for t, word_index in enumerate(input_seq):
        encoder_input_data[i, t, word_index] = 1.
    for t, word_index in enumerate(target_seq):
        decoder_input_data[i, t, word_index] = 1.
        if t > 0:
            decoder_target_data[i, t - 1, word_index] = 1.

# Define the model
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder_lstm = LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.summary()

# Train the model
model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=64, epochs=100, validation_split=0.2)

# Inference models for prediction
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input

# Test the model
for seq_index in range(len(input_texts)):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)