In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Embedding, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from nltk.translate.bleu_score import sentence_bleu
import random

In [2]:
# Load Dataset
def load_data():
    # Replace with actual dataset loading logic
    input_texts = ["Hi", "How are you?", "What is your name?", "Goodbye"]
    target_texts = ["Hello", "I'm fine, thank you!", "I am a chatbot", "See you later"]
    return input_texts, target_texts

In [3]:
# Preprocessing
def preprocess_data(input_texts, target_texts, vocab_size, max_sequence_length):
    tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
    tokenizer.fit_on_texts(input_texts + target_texts)
    
    input_sequences = tokenizer.texts_to_sequences(input_texts)
    target_sequences = tokenizer.texts_to_sequences(target_texts)
    
    input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='post')
    target_sequences = pad_sequences(target_sequences, maxlen=max_sequence_length, padding='post')

    return input_sequences, target_sequences, tokenizer

In [4]:
# Define Basic LSTM Model
def build_basic_seq2seq_model(vocab_size, embedding_dim, max_sequence_length):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_sequence_length),
        LSTM(128, return_sequences=True),
        Dropout(0.5),
        LSTM(128),
        Dropout(0.5),
        Dense(vocab_size, activation='softmax')
    ])
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [5]:
# Define Attention-based Seq2Seq Model
def build_attention_seq2seq_model(vocab_size, embedding_dim, max_sequence_length):
    # Encoder
    encoder_inputs = Input(shape=(max_sequence_length,))
    encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
    encoder_lstm, state_h, state_c = LSTM(128, return_state=True)(encoder_embedding)

    # Decoder
    decoder_inputs = Input(shape=(max_sequence_length,))
    decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
    decoder_lstm = LSTM(128, return_sequences=True, return_state=False)(decoder_embedding, initial_state=[state_h, state_c])
    attention = Dense(1, activation='tanh')(decoder_lstm)
    decoder_outputs = Dense(vocab_size, activation='softmax')(attention)

    # Model
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [6]:
# Train Model
def train_model(model, X_train, Y_train, epochs, batch_size):
    history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)
    return history

In [7]:
# Evaluate Model
def evaluate_model(model, tokenizer, test_input, test_target, max_sequence_length):
    bleu_scores = []
    for input_text, target_text in zip(test_input, test_target):
        input_seq = tokenizer.texts_to_sequences([input_text])
        input_seq = pad_sequences(input_seq, maxlen=max_sequence_length)
        
        prediction = model.predict(input_seq)
        predicted_text = tokenizer.sequences_to_texts(np.argmax(prediction, axis=-1))

        bleu_scores.append(sentence_bleu([target_text.split()], predicted_text.split()))

    return np.mean(bleu_scores)

In [8]:
# Visualization
def plot_training(history1, history2):
    plt.figure(figsize=(12, 6))

    # Basic Model
    plt.plot(history1.history['loss'], label='Basic Model - Loss')
    plt.plot(history1.history['val_loss'], label='Basic Model - Val Loss')

    # Attention Model
    plt.plot(history2.history['loss'], label='Attention Model - Loss')
    plt.plot(history2.history['val_loss'], label='Attention Model - Val Loss')

    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [9]:
# Main Workflow
input_texts, target_texts = load_data()
vocab_size = 5000
embedding_dim = 128
max_sequence_length = 10

X_data, Y_data, tokenizer = preprocess_data(input_texts, target_texts, vocab_size, max_sequence_length)
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=42)

In [11]:
# Basic Model
basic_model = build_basic_seq2seq_model(vocab_size, embedding_dim, max_sequence_length)
print("Training Basic Model...")
history_basic = train_model(basic_model, X_train, Y_train, epochs=10, batch_size=32)

Training Basic Model...
Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request
      await super().execute_request(stream, ident, parent)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute
      res = shell.run_cell(
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Metaverse_KPMG\AppData\Local\Temp\ipykernel_24160\4019647157.py", line 4, in <module>
      history_basic = train_model(basic_model, X_train, Y_train, epochs=10, batch_size=32)
    File "C:\Users\Metaverse_KPMG\AppData\Local\Temp\ipykernel_24160\461917224.py", line 3, in train_model
      history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 994, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\training.py", line 1052, in compute_loss
      return self.compiled_loss(
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\losses.py", line 272, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\losses.py", line 2084, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "C:\Users\Metaverse_KPMG\anaconda3\envs\chatbot-env\lib\site-packages\keras\backend.py", line 5630, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [2,5000] and labels shape [20]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_11171]

In [12]:
# Attention Model
attention_model = build_attention_seq2seq_model(vocab_size, embedding_dim, max_sequence_length)
print("Training Attention Model...")
history_attention = train_model(attention_model, [X_train, X_train], Y_train, epochs=10, batch_size=32)

Training Attention Model...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Plot Comparison
plot_training(history_basic, history_attention)

# Evaluate
print("Evaluating Basic Model...")
basic_bleu = evaluate_model(basic_model, tokenizer, X_test, Y_test, max_sequence_length)

print("Evaluating Attention Model...")
attention_bleu = evaluate_model(attention_model, tokenizer, X_test, Y_test, max_sequence_length)

print(f"Basic Model BLEU Score: {basic_bleu}")
print(f"Attention Model BLEU Score: {attention_bleu}")