# TRANSFORMER SIMPLE MODEL

## GPU info

In [1]:
# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#   print('Not connected to a GPU')
# else:
#   print(gpu_info)

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# Check if GPU is available
from tensorflow.python.client import device_lib

def get_gpu_details():
    devices = device_lib.list_local_devices()
    for device in devices:
        if device.device_type == 'GPU':
            print(f"Device Name: {device.name}")
            print(f"Memory Limit: {device.memory_limit} bytes")
            print(f"Description: {device.physical_device_desc}")

get_gpu_details()


Device Name: /device:GPU:0
Memory Limit: 4158652416 bytes
Description: device: 0, name: NVIDIA GeForce GTX 1660 Ti with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 7.5


### Config

In [4]:
max_length = 15 # Length of input and target sequences, padding

### Import libraries

In [5]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict

import re
import string
import unicodedata
import nltk
import spacy
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.regularizers import l2

import numpy as np
import pandas as pd
from tensorflow.keras.models import Model, load_model
# from tensorflow.keras.layers import Input, Embedding, MultiHeadAttention, Dense, LayerNormalization, Dropout, GlobalAveragePooling1D, Layer, Masking
# from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Masking, MultiHeadAttention
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers.schedules import ExponentialDecay
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

import pickle


In [6]:
print("TensorFlow version:", tf.__version__)
print("TensorFlow Keras version:", tf.keras.__version__)

TensorFlow version: 2.10.0
TensorFlow Keras version: 2.10.0


In [7]:
# Download necessary NLTK resources
nltk.download('punkt')  # Tokenizer
nltk.download('wordnet')  # Lemmatizer
nltk.download('stopwords')  # Stopwords
nltk.download('omw-1.4') # Ensures multilingual contexts

# Stopwords list
stop_words = set(stopwords.words('english'))

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

initial_preprocessing = True

# Load spaCy's English NLP model
nlp = spacy.load('en_core_web_sm')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\tomui\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\tomui\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\tomui\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\tomui\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


## Create prepocessing functions for initial text and later response generation preprocessing

In [8]:
contractions = {
    "’": "'",
    "‘": "'",
    "“": '"',
    "”": '"',
    "can't": "cannot",
    "won't": "will not",
    "n't": " not",
    "i'm": "i am",
    "i'd": "i would",
    "thats's": "that is",
    "it's": "it is",
    "he's": "he is",
    "she's": "she is",
    "you're": "you are",
    "they're": "they are",
    "we're": "we are",
    "i've": "i have",
    "you've": "you have",
    "they've": "they have",
    "we've": "we have",
    "isn't": "is not",
    "aren't": "are not",
    "wasn't": "was not",
    "weren't": "were not",
    "doesn't": "does not",
    "don't": "do not",
    "didn't": "did not",
    "hasn't": "has not",
    "haven't": "have not",
    "hadn't": "had not",
    "i'll": "i will",
    "you'll": "you will",
    "he'll": "he will",
    "she'll": "she will",
    "we'll": "we will",
    "they'll": "they will",
    "wouldn't": "would not",
    "shouldn't": "should not",
    "couldn't": "could not",
    "mightn't": "might not",
    "mustn't": "must not",
    "she'd": "she would",
    "he'd": "he would",
    "they'd": "they would",
    "we'd": "we would",
    "that'll": "that will",
    "there'll": "there will",
    "who'll": "who will",
    "it'll": "it will",
    "that'd": "that would",
    "there'd": "there would",
    "who'd": "who would",
    "when's": "when is",
    "where's": "where is",
    "why's": "why is",
    "how's": "how is",
    "y'all": "you all",
    "let's": "let us",
    "ma'am": "madam",
    "o'clock": "of the clock",
    "ain't": "is not",
    "could've": "could have",
    "should've": "should have",
    "would've": "would have",
    "might've": "might have",
    "must've": "must have",
    "who've": "who have",
    "oughtn't": "ought not",
    "daren't": "dare not",
    "needn't": "need not",
    "what's": "what is",
    "usedn't": "used not"
}

def normalize_text(text: str) -> str:
    # Normalize Unicode string to NFKD form, remove non-ASCII characters, and then decode it back to a UTF-8 string
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
    # Convert to lowercase
    text = text.lower()
    # Remove spaces around apostrophes
    text = re.sub(r"\s*'\s*", "'", text)
    # Add a space before and after any punctuation mark (., !, or ?)
    text = re.sub(r"\s*([.!?])\s*", r" \1 ", text)
    # Correct contractions
    for contraction, replacement in contractions.items():
        text = re.sub(re.escape(contraction), replacement, text)
    # Replace any sequence of characters that are not letters, basic punctuation
    text = re.sub(r"[^a-z' ]", ' ', text) # re.sub(r"[^a-z.,'!? ]", ' ', text)
    # Replace any sequence of whitespace characters with a single space and remove leading and trailing whitespace
    text = re.sub(r"\s+", ' ', text).strip()
    return text

def remove_names(text: str) -> str:
    # Use spaCy to detect and remove names from the text
    doc = nlp(text)
    filtered_text = ' '.join([token.text for token in doc if token.ent_type_ != 'PERSON']) # Takes really long time, exlude from chatbot input preprocessing
    return filtered_text

def preprocess_text(text: str) -> str:
    # Normalize text
    text = normalize_text(text)
    # Remove names using spaCy's NER
    if initial_preprocessing:
        text = remove_names(text)
    # # Remove punctuation
    # text = text.translate(str.maketrans('', '', string.punctuation))
    # Remove stopwords and tokenize
    # words = word_tokenize(text) # More intelligent splitting
    # filtered_words = [word for word in words if word not in stop_words]
    # # Lemmatize words
    # lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]
    # Add <SOS> and <EOS> tokens, and join the list into a single string
    # return ' '.join(['sofs'] + lemmatized_words + ['eofs'])
        # Trim the text to the desired length
    words = text.split()[:max_length]
    trimmed_text = ' '.join(words)  # Consider to remove trimming, if you want pad later on max length
    return trimmed_text

### Load the Tokenizer

In [9]:
# Load the tokenizer from file
data_dir = os.path.join(os.getcwd(), 'data')
tokenizer_path = os.path.join(data_dir, 'tokenizer_dd_tf210.pickle')
with open(tokenizer_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

### Loading the DataFrame

In [10]:
# Loading the DataFrame
data_dir = os.path.join(os.getcwd(), 'data')
file_path_parquet = os.path.join(data_dir, 'training_df_dd_tf210.parquet')
training_data_final = pd.read_parquet(file_path_parquet)

training_data_final.head(10)

Unnamed: 0,input,response,encoder_input_data,decoder_input_data,decoder_output_data
0,say how about going for a few beers after dinner,you know that is tempting but is really not go...,"[0, 0, 0, 0, 0, 138, 33, 37, 75, 20, 8, 206, 3...","[0, 0, 1, 5, 46, 11, 9, 3717, 29, 9, 60, 15, 4...","[0, 0, 5, 46, 11, 9, 3717, 29, 9, 60, 15, 47, ..."
1,you know that is tempting but is really not go...,what do you mean it will help us to relax,"[0, 0, 5, 46, 11, 9, 3717, 29, 9, 60, 15, 47, ...","[0, 0, 0, 0, 0, 1, 18, 13, 5, 161, 10, 23, 101...","[0, 0, 0, 0, 0, 18, 13, 5, 161, 10, 23, 101, 9..."
2,what do you mean it will help us to relax,do you really think so i do not it will just m...,"[0, 0, 0, 0, 0, 18, 13, 5, 161, 10, 23, 101, 9...","[1, 13, 5, 60, 43, 36, 4, 13, 15, 10, 23, 48, ...","[13, 5, 60, 43, 36, 4, 13, 15, 10, 23, 48, 102..."
3,do you really think so i do not it will just m...,i guess you are right but what shall we do i d...,"[13, 5, 60, 43, 36, 4, 13, 15, 10, 23, 48, 102...","[1, 4, 226, 5, 17, 53, 29, 18, 325, 22, 13, 4,...","[4, 226, 5, 17, 53, 29, 18, 325, 22, 13, 4, 13..."
4,i guess you are right but what shall we do i d...,i suggest a walk over to the gym where we can ...,"[4, 226, 5, 17, 53, 29, 18, 325, 22, 13, 4, 13...","[1, 4, 593, 8, 423, 140, 7, 6, 973, 105, 22, 2...","[4, 593, 8, 423, 140, 7, 6, 973, 105, 22, 21, ..."
5,i suggest a walk over to the gym where we can ...,that 's a good idea i hear mary and sally ofte...,"[4, 593, 8, 423, 140, 7, 6, 973, 105, 22, 21, ...","[1, 11, 38, 8, 47, 179, 4, 237, 441, 14, 3323,...","[11, 38, 8, 47, 179, 4, 237, 441, 14, 3323, 30..."
6,that 's a good idea i hear mary and sally ofte...,sounds great to me if they are willing we coul...,"[11, 38, 8, 47, 179, 4, 237, 441, 14, 3323, 30...","[1, 154, 99, 7, 26, 57, 54, 17, 1083, 22, 79, ...","[154, 99, 7, 26, 57, 54, 17, 1083, 22, 79, 200..."
7,sounds great to me if they are willing we coul...,good let us go now,"[154, 99, 7, 26, 57, 54, 17, 1083, 22, 79, 200...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 47, 74, 93, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 74, 93, 59,..."
8,good let us go now,all right,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 74, 93, 59,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 50,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 53..."
9,can you do push ups,of course i can it is a piece of cake believe ...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 5, 13, 1635...","[1, 16, 125, 4, 21, 10, 9, 8, 773, 16, 899, 25...","[16, 125, 4, 21, 10, 9, 8, 773, 16, 899, 254, ..."


In [11]:
print(len(tokenizer.word_index))
print(tokenizer.num_words)

15384
10000


### Set the Data

In [12]:
encoder_input_data = np.array(training_data_final['encoder_input_data'].tolist())
decoder_input_data = np.array(training_data_final['decoder_input_data'].tolist())
decoder_output_data = np.array(training_data_final['decoder_output_data'].tolist())
print(type(encoder_input_data))
print(encoder_input_data.shape)
print(decoder_input_data.shape)
print(decoder_output_data.shape)
encoder_input_data.dtype

<class 'numpy.ndarray'>
(89861, 15)
(89861, 16)
(89861, 16)


dtype('int64')

In [13]:
encoder_input_data = encoder_input_data.astype('int32')
decoder_input_data = decoder_input_data.astype('int32')
decoder_output_data = decoder_output_data.astype('int32')
encoder_input_data.dtype

dtype('int32')

### Define the Model

In [14]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = models.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training, mask=None):
        attn_output = self.att(inputs, inputs, attention_mask=mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim, mask_zero=True)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def compute_mask(self, inputs, mask=None):
        return self.token_emb.compute_mask(inputs, mask=mask)


In [15]:
def create_model(vocab_size, max_encoder_seq_length, max_decoder_seq_length, embedding_dim, num_heads, ff_dim):
    # Encoder
    encoder_inputs = layers.Input(shape=(max_encoder_seq_length,))
    encoder_embedding = TokenAndPositionEmbedding(max_encoder_seq_length, vocab_size, embedding_dim)
    encoder_mask = encoder_embedding.compute_mask(encoder_inputs)
    encoder_transformer_block = TransformerBlock(embedding_dim, num_heads, ff_dim)
    encoder_output = encoder_transformer_block(encoder_embedding(encoder_inputs), mask=encoder_mask)

    # Decoder
    decoder_inputs = layers.Input(shape=(max_decoder_seq_length,))
    decoder_embedding = TokenAndPositionEmbedding(max_decoder_seq_length, vocab_size, embedding_dim)
    decoder_mask = decoder_embedding.compute_mask(decoder_inputs)
    decoder_transformer_block = TransformerBlock(embedding_dim, num_heads, ff_dim)
    decoder_output = decoder_transformer_block(decoder_embedding(decoder_inputs), mask=decoder_mask)

    # Output
    decoder_dense = layers.Dense(vocab_size, activation="softmax")
    decoder_output = decoder_dense(decoder_output)

    # Model
    transformer = models.Model([encoder_inputs, decoder_inputs], decoder_output)

    return transformer

# Model parameters
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 256
num_heads = 8
ff_dim = 512
max_encoder_seq_length = 15
max_decoder_seq_length = 16

# Create and compile the model
model = create_model(vocab_size, max_encoder_seq_length, max_decoder_seq_length, embedding_dim, num_heads, ff_dim)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 16)]         0           []                               
                                                                                                  
 token_and_position_embedding_1  (None, 16, 256)     3942656     ['input_2[0][0]']                
  (TokenAndPositionEmbedding)                                                                     
                                                                                                  
 tf.math.not_equal_1 (TFOpLambd  (None, 16)          0           ['input_2[0][0]']                
 a)                                                                                               
                                                                                              

### Train the Model

In [16]:
# Train the model
history = model.fit(
    [encoder_input_data, decoder_input_data],
    np.expand_dims(decoder_output_data, -1),  # Ensure proper shape for loss function
    batch_size=64,
    epochs=1,  # Increase epochs for actual training
    validation_split=0.2
)

InvalidArgumentError: Graph execution error:

Detected at node 'model/transformer_block_1/multi_head_attention_1/and_1' defined at (most recent call last):
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
      app.start()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\kernelapp.py", line 701, in start
      self.io_loop.start()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\asyncio\windows_events.py", line 321, in run_forever
      super().run_forever()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\kernelbase.py", line 534, in dispatch_queue
      await self.process_one()
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\kernelbase.py", line 523, in process_one
      await dispatch(*args)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\kernelbase.py", line 429, in dispatch_shell
      await result
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
      reply_content = await reply_content
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\ipkernel.py", line 429, in do_execute
      res = shell.run_cell(
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\interactiveshell.py", line 3051, in run_cell
      result = self._run_cell(
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\interactiveshell.py", line 3106, in _run_cell
      result = runner(coro)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\interactiveshell.py", line 3311, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\interactiveshell.py", line 3493, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\tomui\AppData\Local\Temp\ipykernel_21136\908720210.py", line 2, in <module>
      history = model.fit(
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 993, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\AppData\Local\Temp\ipykernel_21136\3638583998.py", line 14, in call
      attn_output = self.att(inputs, inputs, attention_mask=mask)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\layers\attention\multi_head_attention.py", line 553, in call
      attention_mask = self._compute_attention_mask(
    File "C:\Users\tomui\anaconda3\envs\tensorflow_cuda_gpu_py310\lib\site-packages\keras\layers\attention\multi_head_attention.py", line 670, in _compute_attention_mask
      else tf.cast(attention_mask, bool) & auto_mask
Node: 'model/transformer_block_1/multi_head_attention_1/and_1'
required broadcastable shapes
	 [[{{node model/transformer_block_1/multi_head_attention_1/and_1}}]] [Op:__inference_train_function_2726]

In [None]:
# Visualize training history
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

### Inference

In [None]:
print(tokenizer.word_index['<START>'], tokenizer.word_index['<END>'])

In [None]:
def generate_response(model, tokenizer, test_seq, max_length):
    test_seq = np.expand_dims(test_seq, axis=0)
    target_seq = np.array([[tokenizer.word_index['<START>']]])

    for _ in range(max_length):
        padded_target_seq = pad_sequences([target_seq[0]], maxlen=max_length, padding='post')
        predictions = model.predict([test_seq, padded_target_seq], verbose=0)
        predicted_index = np.argmax(predictions[0, len(target_seq[0]) - 1, :])

        if predicted_index == tokenizer.word_index['<END>']:
            break

        target_seq = np.append(target_seq, [[predicted_index]], axis=-1)

    generated_text = tokenizer.sequences_to_texts([target_seq[0]])[0]
    generated_text = generated_text.replace('<START>', '').replace('<END>', '').strip()

    return generated_text

In [None]:
max_length = 15

# Test examples
test_examples = [
    "How are you doing today?",
    "What is your name?",
    "Can you help me with my homework?",
    "What is the weather like?",
    "Tell me a joke.",
    "Who is the president of the United States?",
    "What is the capital of France?",
    "Do you like pizza?",
    "What is your favorite color?",
    "Goodbye!"
]

# Preprocess input text
input_text = [preprocess_text(text) for text in test_examples]

# Tokenize and pad the test examples
test_sequences = tokenizer.texts_to_sequences(input_text)
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_encoder_seq_length, padding='pre', truncating='post')

# Generate responses
for test_seq in padded_test_sequences:
    response = generate_response(model, tokenizer, test_seq, max_length=15)
    print(f"Input: {test_examples[padded_test_sequences.tolist().index(test_seq.tolist())]}")
    print(f"Response: {response}")
    print("-" * 50)

### Save the Weights

In [None]:
data_dir = os.path.join(os.getcwd(), 'data')
weights_path = os.path.join(data_dir, 's2s_model_dd_tf210_weights_transformer.h5')
transformer_chatbot.save_weights(weights_path)