In [611]:
import numpy as np 
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from sklearn.model_selection import train_test_split
import random
import pickle
import re
from tensorflow.keras import layers , activations , models , preprocessing, utils

from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, TimeDistributed
from tensorflow.keras.models  import Model

In [557]:
# Load the data
with open('Intent.json') as file:
    data = json.load(file)

In [558]:
# For loop through each intents dictionary 
# lists of sentences and responses
questions  = []
responses = []

for intent in data["intents"]:
    for q in intent["text"]:
        questions.append(q)
        responses.append(intent["responses"][0])
# I am using the first response for each intent
# There is another way to pick a random response from the list of responses
# because in responses there are multiple responses for each intent 
# I got the main structure code from lecture notes

In [559]:
# Let's check the length of questions and responses
print(len(questions))      
print(len(responses))

143
143


In [560]:
# I need to clean the question and response list 
def clean(s):
    return re.sub(r"[^a-z0-9\s]","", s.lower())

In [566]:
# Apply the clean function to the questions and responses
questions = [clean(q) for q in questions]
answers   = [clean(r) for r in responses]

# I will  add <start> and <end> tokens to the decoder input/output in seq2seq models
# During training, the decoder learns to predict the next token based on the previous tokens
answers   = [f'<START> {answer} <END>' for answer in answers]

In [564]:
tokenizer = preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(questions + answers)
VOCAB_SIZE = len(tokenizer.word_index)+1
print('vocab size:', VOCAB_SIZE)

vocab size: 169


In [567]:
# Save the tokenizer For later use during inference
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [569]:
# I use the fitted tokenizer to convert the cleaned questions and answers
# (with start/end tokens) into sequences of integers.
questions_seq = tokenizer.texts_to_sequences(questions)
answers_seq = tokenizer.texts_to_sequences(answers)

In [618]:
# Padding Sequences:
# I will pad the sequences to ensure that all sequences have the same length.
max_len_questions = max(len(seq) for seq in questions_seq)
max_len_answers = max(len(seq) for seq in answers_seq)

print(f'Max question length: {max_len_questions}')
print(f'Max answer length: {max_len_answers}')

padded_questions = pad_sequences(questions_seq, maxlen=max_len_questions, padding='post')
padded_answers = pad_sequences(answers_seq, maxlen=max_len_answers, padding='post')

Max question length: 9
Max answer length: 21


In [619]:
encoder_input_data = np.array(padded_questions)
decoder_input_data = np.array(padded_answers)

In [620]:
shifted = [seq[1:] for seq in answers_seq]
padded_answers = pad_sequences(
    shifted,
    maxlen = max_len_answers,
    padding = "post"
)
decoder_target_data = to_categorical(padded_answers, num_classes=VOCAB_SIZE)
print(decoder_target_data.shape)

(143, 21, 169)


In [622]:
# Turn them into NumPy arrays for Keras
print(decoder_target_data.shape)    
print(encoder_input_data.shape)  
print(decoder_input_data.shape)

(143, 21, 169)
(143, 9)
(143, 21)


**Building the LSTM Chatbot Model**

In [630]:
EMBEDDING_DIM = 100
LATENT_DIM = 128    
BATCH_SIZE = 64      
EPOCHS = 5

In [631]:
# 1) Encoder inputs: a batch of padded question token IDs, shape = (batch, maxlen_questions)
encoder_inputs = Input(shape=(max_len_questions,), name="encoder_inputs")

# 2) Embed those IDs into dense vectors
enc_emb = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name="encoder_embedding")(encoder_inputs)

# 3) Run an LSTM over the embeddings and grab only its final states
#    LATENT_DIM is the size of the hidden/cell vectors
_, state_h, state_c = LSTM(LATENT_DIM, return_state=True, name="encoder_lstm")(enc_emb)

# 4) Bundle the final LSTM states to pass to the decoder
encoder_states = [state_h, state_c]


In [632]:
# 1) Decoder inputs: padded answer token IDs (includes your <start> token up front)
decoder_inputs = Input(shape=(max_len_answers,), name="decoder_inputs")

# 2) Embed those IDs into vectors
dec_emb = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name="decoder_embedding")(decoder_inputs)

# 3) Run an LSTM over the embeddings, seeding it with the encoder’s final states
#    return_sequences=True so we get an output at each time step
dec_lstm, _, _ = LSTM(
    LATENT_DIM,
    return_sequences=True,
    return_state=True,
    name="decoder_lstm"
)(dec_emb, initial_state=encoder_states)

# 4) Turn each LSTM output into a softmax over the vocab
decoder_outputs = Dense(VOCAB_SIZE, activation="softmax", name="decoder_dense")(dec_lstm)


In [633]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Print a summary of the model architecture and parameters
model.summary()

In [634]:
# Compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [635]:
print("Starting training...")
history = model.fit([encoder_input_data, decoder_input_data], # Input data (list)
                    decoder_target_data,                     # Target data
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2)                    # Use 20% of data for validation
print("Training complete.")

Starting training...
Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 395ms/step - accuracy: 0.0403 - loss: 5.1247 - val_accuracy: 0.2447 - val_loss: 5.1177
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.3346 - loss: 5.0960 - val_accuracy: 0.5140 - val_loss: 5.1065
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.7665 - loss: 5.0683 - val_accuracy: 0.5140 - val_loss: 5.0918
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.8079 - loss: 5.0303 - val_accuracy: 0.4877 - val_loss: 5.0688
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - accuracy: 0.8278 - loss: 4.9717 - val_accuracy: 0.4877 - val_loss: 5.0231
Training complete.


In [None]:
onehot_answers = utils.to_categorical(padded_answers , VOCAB_SIZE)
decoder_output_data = np.array(onehot_answers)

In [None]:
start_id = len(tokenizer.word_index) + 1
end_id   = start_id + 1
VOCAB = end_id + 1 

In [529]:
answer_seqs = [[start_id] + seq + [end_id] for seq in answer_seqs]
question_seqs = tokenizer.texts_to_sequences(answers)

In [530]:
max_q = max(len(s) for s in question_seqs)
max_a = max(len(s) for s in answer_seqs)
enc_in = pad_sequences(question_seqs, maxlen=max_q, padding="post")
dec_in = pad_sequences(answer_seqs,   maxlen=max_a, padding="post")

In [None]:
encoder_input_data = np.array(padded_questions)
decoder_input_data = np.array(padded_answers)

In [531]:
dec_tar = np.zeros_like(dec_in)
for i, seq in enumerate(dec_in):
    dec_tar[i, :-1] = seq[1:]
dec_tar = to_categorical(dec_tar, num_classes=VOCAB)

In [533]:
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, TimeDistributed
from tensorflow.keras.models  import Model

In [542]:
# 1) Encoder
enc_inputs = Input(shape=(None,), name="encoder_inputs")
# turn word-IDs → 64-d vectors
enc_emb    = Embedding(input_dim=VOCAB, output_dim=100, mask_zero=True)(enc_inputs)
# run through an LSTM, keep its final h/c
_, state_h, state_c = LSTM(128, return_state=True, name="encoder_lstm")(enc_emb)
encoder_states = [state_h, state_c]

# 2) Decoder
dec_inputs = Input(shape=(None,), name="decoder_inputs")
dec_emb    = Embedding(input_dim=VOCAB, output_dim=100, mask_zero=True)(dec_inputs)
# LSTM returns a full sequence plus new h/c, seeded by the encoder’s states
dec_lstm, _, _ = LSTM(
    128,
    return_sequences=True,
    return_state=True,
    name="decoder_lstm"
)(dec_emb, initial_state=encoder_states)
# project each time-step to a softmax over the vocab
dec_outputs = Dense(VOCAB, activation="softmax", name="decoder_dense")(dec_lstm)

# 3) Build & compile
model = Model([enc_inputs, dec_inputs], dec_outputs)
model.compile(
    optimizer="adam",   
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [543]:
# 4) Train
model.fit(
    [enc_in, dec_in],  # encoder & decoder inputs
    dec_tar,           # one-hot “next word” targets
    batch_size=64,
    epochs=100,
    validation_split=0.1
)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 464ms/step - accuracy: 0.0357 - loss: 5.1320 - val_accuracy: 0.1136 - val_loss: 5.1219
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.1260 - loss: 5.1058 - val_accuracy: 0.1111 - val_loss: 5.1029
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step - accuracy: 0.1155 - loss: 5.0712 - val_accuracy: 0.1111 - val_loss: 5.0716
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.1111 - loss: 5.0168 - val_accuracy: 0.1111 - val_loss: 5.0116
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.1111 - loss: 4.9223 - val_accuracy: 0.1111 - val_loss: 4.8768
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.1111 - loss: 4.7336 - val_accuracy: 0.1111 - val_loss: 4.5965
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1cd639411b0>

In [505]:
VOCAB_SIZE = len(tokenizer.word_index) + 1
print("New vocab size:", VOCAB_SIZE)


New vocab size: 170


In [496]:
max_q = max(len(x) for x in Q)
max_a = max(len(x) for x in A)
enc_in = pad_sequences(Q, maxlen=max_q, padding="post")
dec_in = pad_sequences(A, maxlen=max_a, padding="post")

In [497]:

dec_tar = np.zeros_like(dec_in)
for i, seq in enumerate(dec_in):
    dec_tar[i, :-1] = seq[1:]
VOCAB = len(tokenizer.word_index) + 1
dec_tar = to_categorical(dec_tar, num_classes=VOCAB)


In [517]:
# Encoder
enc_input  = Input(shape=(None,))
enc_embed  = Embedding(VOCAB, 32, mask_zero=True)(enc_input)
_, h, c    = LSTM(128, return_state=True)(enc_embed)
enc_states = [h, c]

In [518]:
# Encoder
enc_input  = Input(shape=(None,))
enc_embed  = Embedding(VOCAB, 32, mask_zero=True)(enc_input)
_, h, c    = LSTM(128, return_state=True)(enc_embed)
enc_states = [h, c]

In [519]:
# Decoder
dec_input  = Input(shape=(None,))
dec_embed  = Embedding(VOCAB, 32, mask_zero=True)(dec_input)
dec_lstm, _, _ = LSTM(128, return_sequences=True, return_state=True)(
    dec_embed, initial_state=enc_states
)
# Apply Dense directly on the 3D output (batch, time, features):
dec_out = Dense(
    VOCAB_SIZE,
    activation='softmax',
    name='decoder_dense'
)(dec_lstm)


In [None]:
model = Model([enc_input, dec_input], dec_out)
model.compile("rmsprop", "categorical_crossentropy", metrics=["accuracy"])

In [None]:
# 7) Train
model.fit([enc_in, dec_in], dec_tar, batch_size=64, epochs=100, validation_split=0.1)


Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 139ms/step - accuracy: 0.0654 - loss: 5.1328 - val_accuracy: 0.4667 - val_loss: 5.1252
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5812 - loss: 5.1126 - val_accuracy: 0.4720 - val_loss: 5.1048
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.6662 - loss: 5.0731 - val_accuracy: 0.4720 - val_loss: 5.0329
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.6691 - loss: 4.9568 - val_accuracy: 0.4720 - val_loss: 4.7274
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6661 - loss: 4.6611 - val_accuracy: 0.4720 - val_loss: 4.5621
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6712 - loss: 4.3711 - val_accuracy: 0.4720 - val_loss: 4.7922
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1cd3cdec8b0>

In [458]:
# 1) Clean questions
clean_questions = [ clean_text(q) for q in training_sentences ]

# 2) Clean responses
clean_responses = [ clean_text(r) for r in responses ]

# 3) Add your special tokens around each response
target_texts    = [f"<start> {clean_text(r)} <end>"          for r   in clean_responses]




In [459]:
print(len(training_sentences))      
print(len(responses))

143
143


In [460]:
print("Training sentences: ", training_sentences[:10])  
print("Responses: ", responses[:10])

Training sentences:  ['Hi', 'Hi there', 'Hola', 'Hello', 'Hello there', 'Hya', 'Hya there', 'My user is Adam', 'This is Adam', 'I am Adam']
Responses:  ['Hello human, please tell me your GeniSys user', 'Hi human, please tell me your GeniSys user', 'Hola human, please tell me your GeniSys user', 'Hello human, please tell me your GeniSys user', 'Hola human, please tell me your GeniSys user', 'Hello human, please tell me your GeniSys user', 'Hi human, please tell me your GeniSys user', 'OK! Hola <HUMAN>, how can I help you?', 'Cool! Hello <HUMAN>, what can I do for you?', 'Cool! Hello <HUMAN>, what can I do for you?']


In [461]:
print('First 5 tags:', labels[:5])
print('First 5 sentences:', training_sentences[:5])
print('First 5 responses:', responses[:5])
print('Training labels:', training_labels[:5])

First 5 tags: ['Greeting', 'GreetingResponse', 'CourtesyGreeting', 'CourtesyGreetingResponse', 'CurrentHumanQuery']
First 5 sentences: ['Hi', 'Hi there', 'Hola', 'Hello', 'Hello there']
First 5 responses: ['Hello human, please tell me your GeniSys user', 'Hi human, please tell me your GeniSys user', 'Hola human, please tell me your GeniSys user', 'Hello human, please tell me your GeniSys user', 'Hola human, please tell me your GeniSys user']
Training labels: ['Greeting', 'Greeting', 'Greeting', 'Greeting', 'Greeting']


In [484]:

tokenizer = Tokenizer()  
tokenizer.fit_on_texts(training_sentences + responses)
question_seqs = tokenizer.texts_to_sequences(training_sentences)
answer_seqs   = tokenizer.texts_to_sequences(responses)

word_index = tokenizer.word_index
print(f'{len(word_index)} unique tokens.')
start_token_index = tokenizer.word_index['start']
end_token_index   = tokenizer.word_index['end']

315 unique tokens.


KeyError: 'start'

In [463]:
max_q = max(len(s) for s in question_seqs)
max_a = max(len(s) for s in answer_seqs)

print(f"Max question length: {max_q}")
print(f"Max answer length: {max_a}")    

padded_questions= pad_sequences(question_seqs, maxlen=max_q, padding='post')
padded_answers = pad_sequences(answer_seqs,   maxlen=max_a, padding='post')


Max question length: 9
Max answer length: 71


In [464]:
VOCAB_SIZE = len(tokenizer.word_index) + 1
print("New vocab size:", VOCAB_SIZE)


New vocab size: 316


In [465]:
encoder_input_data = np.array(padded_questions)

In [None]:
decoder_input_data = np.array(padded_answers)

In [467]:
from tensorflow.keras.utils import to_categorical

# Create decoder target data by shifting padded_answers
decoder_target_data = np.zeros_like(decoder_input_data)
for i, seq in enumerate(decoder_input_data):
  shifted = seq[1:] 
  decoder_target_data[i, :-1] = shifted 
  # Last element remains 0 (padding)

# One-hot encode the decoder target data
decoder_target_data = to_categorical(decoder_target_data, num_classes=VOCAB_SIZE)

print("Encoder Input Shape:", encoder_input_data.shape)
print("Decoder Input Shape:", decoder_input_data.shape)
print("Decoder Target Shape:", decoder_target_data.shape)

Encoder Input Shape: (143, 9)
Decoder Input Shape: (143, 71)
Decoder Target Shape: (143, 71, 316)


In [468]:
EMBEDDING_DIM = 100  
LATENT_DIM = 128     
BATCH_SIZE = 64     
EPOCHS = 100         

In [469]:
# Encoder Input Layer
# Takes sequences of integer IDs with length maxlen_questions
encoder_inputs = Input(shape=(None,), name='encoder_inputs')

# Embedding Layer
# Converts integer sequences to dense vectors of EMBEDDING_DIM
# mask_zero=True tells the layer to ignore padding (0s) in subsequent layers
encoder_embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name='encoder_embedding')
encoder_embedding = encoder_embedding_layer(encoder_inputs)

# LSTM Layer
# Processes the embedded sequence
# LATENT_DIM is the number of LSTM units (dimensionality of hidden/cell state)
# return_state=True ensures the final hidden state (state_h) and cell state (state_c) are returned
encoder_lstm = LSTM(LATENT_DIM, return_state=True, name='encoder_lstm')
# We don't need the per-timestep outputs of the encoder, only the final states
_, state_h, state_c = encoder_lstm(encoder_embedding)

# The encoder_states contain the final hidden and cell state, capturing the input sequence context
encoder_states = [state_h, state_c]

In [470]:
# Decoder Input Layer
# Takes sequences of integer IDs with length maxlen_answers (including <start>)
decoder_inputs = Input(shape=(None,), name='decoder_inputs')

# Embedding Layer (can reuse encoder's or define a new one)
# Using a separate layer allows learning different embeddings for input vs output if needed
decoder_embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name='decoder_embedding')
decoder_embedding = decoder_embedding_layer(decoder_inputs)

# Decoder LSTM Layer
# return_sequences=True is essential because we need an output at each timestep for the Dense layer
# return_state=True is needed for inference later, although states are not directly used in this training graph connection
decoder_lstm = LSTM(LATENT_DIM, return_sequences=True, return_state=True, name='decoder_lstm')

# Crucially, the decoder LSTM is initialized with the encoder_states
# This provides the context from the input sequence
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Dense Output Layer
# Projects the LSTM outputs to the vocabulary size
# Softmax activation provides a probability distribution over the target vocabulary for each timestep
decoder_dense = Dense(VOCAB_SIZE, activation='softmax', name='decoder_dense')
decoder_outputs = decoder_dense(decoder_outputs)

In [471]:
from tensorflow.keras.models import Model

In [472]:
# Define the complete model for training
# It takes encoder_inputs and decoder_inputs and outputs decoder_outputs
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Print a summary of the model architecture and parameters
model.summary()

In [473]:
# Compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [474]:
print("Starting training...")
history = model.fit([encoder_input_data, decoder_input_data], # Input data (list)
                    decoder_target_data,                     # Target data
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2)                
# Use 20% of data for validation
print("Training complete.")

Starting training...
Epoch 1/100


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 565ms/step - accuracy: 0.0201 - loss: 5.7522 - val_accuracy: 0.7596 - val_loss: 5.7513
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step - accuracy: 0.9268 - loss: 5.7362 - val_accuracy: 0.7596 - val_loss: 5.7474
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.9239 - loss: 5.7219 - val_accuracy: 0.7596 - val_loss: 5.7428
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.9259 - loss: 5.7047 - val_accuracy: 0.7596 - val_loss: 5.7367
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.9270 - loss: 5.6857 - val_accuracy: 0.7596 - val_loss: 5.7266
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.9265 - loss: 5.6528 - val_accuracy: 0.7596 - val_loss: 5.7046
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━

In [479]:
model.save('chatbot_model.keras')

In [480]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

# 1) Grab your original embedding + LSTM layers by name:
emb_layer  = model.get_layer('encoder_embedding')  # the Embedding you used on encoder side
lstm_layer = model.get_layer('encoder_lstm')       # the LSTM you used with return_state=True

# 2) Re-define a fresh Input for inference:
enc_inputs = Input(shape=(None,), name='enc_input_inf')

# 3) Re-apply the same layers to that input:
enc_emb    = emb_layer(enc_inputs)
# Since you originally did something like
#   _, state_h, state_c = LSTM(..., return_state=True)(enc_emb)
# you can do the same here:
_, state_h, state_c = lstm_layer(enc_emb)

# 4) Build the encoder‐only model that outputs the states:
encoder_model = Model(enc_inputs, [state_h, state_c])


In [482]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
# 1) Grab the trained decoder‐side layers by name:
dec_emb_layer  = model.get_layer("decoder_embedding")
dec_lstm_layer = model.get_layer("decoder_lstm")
dec_dense      = model.get_layer("decoder_dense")   # ← here

# 2) Build the single‐step decoder for inference:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

decoder_input_token = Input(shape=(1,),   name="dec_input_token")
state_input_h       = Input(shape=(LATENT_DIM,), name="dec_state_h")
state_input_c       = Input(shape=(LATENT_DIM,), name="dec_state_c")

dec_emb_inf = dec_emb_layer(decoder_input_token)
dec_outputs, h_new, c_new = dec_lstm_layer(
    dec_emb_inf,
    initial_state=[state_input_h, state_input_c]
)

# apply your Dense (which will run over the time axis automatically)
token_probs = dec_dense(dec_outputs)

decoder_model = Model(
    [decoder_input_token, state_input_h, state_input_c],
    [token_probs, h_new, c_new]
)



In [483]:
import numpy as np
start_token_index = tokenizer.word_index['start']
end_token_index   = tokenizer.word_index['end']

# Reverse map from integer → word (so you can turn IDs back into strings)
def decode_sequence(input_text):
    # 1) Encode the input question
    seq    = tokenizer.texts_to_sequences([clean_text(input_text)])
    padded = pad_sequences(seq, maxlen=max_q, padding="post")
    h, c   = encoder_model.predict(padded)

    # 2) Seed the decoder with the bare “start” token ID
    current_token = np.array([[ start_token_index ]])
    decoded = []

    # 3) Step by step generation
    for _ in range(max_a):
        probs, h, c = decoder_model.predict([current_token, h, c])
        next_id     = probs[0, -1].argmax()

        # If we hit the “end” token, stop early
        if next_id == end_token_index:
            break

        # Otherwise turn that ID back into a word
        decoded.append(reverse_word_index[next_id])

        # And feed it back into the next step
        current_token = np.array([[ next_id ]])

    return " ".join(decoded)

# Now this will work without KeyErrors:
print(decode_sequence("Hello there!"))


KeyError: 'start'

In [None]:
import random
intent2resps = {
  intent["intent"]: intent["responses"]
  for intent in data["intents"]
}

def predict_intent(text):
    seq = tokenizer.texts_to_sequences([text])
    p   = pad_sequences(seq, maxlen=maxlen, padding='post')
    pred= model.predict(p)[0]
    idx = pred.argmax()
    intent_name = le.inverse_transform([idx])[0]
    conf = pred[idx]
    return intent_name, conf    

def bot_response(text):
    intent, conf = predict_intent(text)
    if conf < 0.3:
        return "Sorry, I didn’t get that. Can you rephrase?"
    return random.choice(intent2resps[intent])

# example
print(bot_response("Hi"))

ValueError: Layer "functional_5" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'data:0' shape=(1, 9) dtype=int32>]

In [None]:
# 2. Tokenize using the loaded tokenizer
sequence = tokenizer.texts_to_sequences([text]) # Input must be a list

# 3. Pad the sequence to maxlen_questions
padded_sequence = pad_sequences(sequence, maxlen=maxlen_questions, padding='post')

return padded_sequence # Shape: (1, maxlen_questions)

In [None]:
# It is short list of each intent name only once.
len(labels)

22

In [None]:
# One label for every training sentence
len(training_labels)

143

In [None]:
# show the number of sentences and labels
print('Number of sentences:', len(training_sentences))
print('Number of labels:', len(training_labels))

Number of sentences: 143
Number of labels: 143


In [None]:
# Let's create a tokenizer
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(training_sentences)

In [None]:
# Convert each sentence into a list of integers.
sequences = tokenizer.texts_to_sequences(training_sentences)
# find the length of the longest sentence
maxlen    = max(len(seq) for seq in sequences)

In [None]:
X = pad_sequences(sequences, maxlen=maxlen, padding='post')
vocab_size = len(tokenizer.word_index) + 1
print(f'Vocab size: {vocab_size}, maxlen: {maxlen}')

Vocab size: 118, maxlen: 9


In [None]:
   # Save the tokenizer
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
encoder_input_data = np.array(X)

In [None]:
print('Encoder Input Shape:', encoder_input_data.shape)

Encoder Input Shape: (143, 9)


In [None]:
# turn my intent names into integer codes.
le    = LabelEncoder()
y_int = le.fit_transform(training_labels)      
y     = to_categorical(y_int)     

In [None]:
num_classes = y.shape[1]
print('Number of classes:', num_classes)

Number of classes: 22


In [None]:
"""
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.1, random_state=42
)
print('Training size:', len(X_train))
print('Validation size:', len(X_val))
"""

Training size: 128
Validation size: 15


In [None]:
EMBEDDING_DIM = 32
LATENT_DIM = 128     
BATCH_SIZE = 64    
EPOCHS = 100 

In [None]:
from tensorflow.keras.layers import Input

# Encoder Input Layer
# Takes sequences of integer IDs with length maxlen_questions
encoder_inputs = Input(shape=(maxlen,), name='encoder_inputs')

# Embedding Layer
# Converts integer sequences to dense vectors of EMBEDDING_DIM
# mask_zero=True tells the layer to ignore padding (0s) in subsequent layers
encoder_embedding_layer = Embedding(vocab_size, EMBEDDING_DIM, mask_zero=True, name='encoder_embedding')
encoder_embedding = encoder_embedding_layer(encoder_inputs)

# LSTM Layer
# Processes the embedded sequence
# LATENT_DIM is the number of LSTM units (dimensionality of hidden/cell state)
# return_state=True ensures the final hidden state (state_h) and cell state (state_c) are returned
encoder_lstm = LSTM(LATENT_DIM, return_state=True, name='encoder_lstm')
# We don't need the per-timestep outputs of the encoder, only the final states
_, state_h, state_c = encoder_lstm(encoder_embedding)

# The encoder_states contain the final hidden and cell state, capturing the input sequence context
encoder_states = [state_h, state_c]

In [None]:
# Decoder Input Layer
# Takes sequences of integer IDs with length maxlen_answers (including <start>)
decoder_inputs = Input(shape=(maxlen_answers,), name='decoder_inputs')

# Embedding Layer (can reuse encoder's or define a new one)
# Using a separate layer allows learning different embeddings for input vs output if needed
decoder_embedding_layer = Embedding(VOCAB_SIZE, EMBEDDING_DIM, mask_zero=True, name='decoder_embedding')
decoder_embedding = decoder_embedding_layer(decoder_inputs)

# Decoder LSTM Layer
# return_sequences=True is essential because we need an output at each timestep for the Dense layer
# return_state=True is needed for inference later, although states are not directly used in this training graph connection
decoder_lstm = LSTM(LATENT_DIM, return_sequences=True, return_state=True, name='decoder_lstm')

# Crucially, the decoder LSTM is initialized with the encoder_states
# This provides the context from the input sequence
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Dense Output Layer
# Projects the LSTM outputs to the vocabulary size
# Softmax activation provides a probability distribution over the target vocabulary for each timestep
decoder_dense = Dense(VOCAB_SIZE, activation='softmax', name='decoder_dense')
decoder_outputs = decoder_dense(decoder_outputs)

In [None]:
# np.argmax(..., axis=1) finds the index of the 1 in each row
# turning each one hot vector back into a single integer label
# Since I am going to use sparse_categorical_crossentropy as the loss function
# it expects integer class IDs, not one hot
"""y_train = np.argmax(y_train,   axis=1)
y_val   = np.argmax(y_val,     axis=1)  
print('Training set shape:', y_train.shape)"""

Training set shape: (128,)


In [None]:
embed_dim = 32
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embed_dim, input_length=maxlen, mask_zero=True),
    LSTM(64),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])


model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])

model.summary()



In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32
)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 100ms/step - accuracy: 0.0594 - loss: 3.0907 - val_accuracy: 0.0667 - val_loss: 3.0924
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.1250 - loss: 3.0853 - val_accuracy: 0.0000e+00 - val_loss: 3.0913
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.2917 - loss: 3.0805 - val_accuracy: 0.0000e+00 - val_loss: 3.0903
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.2750 - loss: 3.0749 - val_accuracy: 0.0000e+00 - val_loss: 3.0888
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.3552 - loss: 3.0657 - val_accuracy: 0.0000e+00 - val_loss: 3.0873
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.3729 - loss: 3.0564 - val_accuracy: 0.0000e+00 - val_loss: 3.0859
Epoch 7/50
[1m4/4[0m [

In [None]:
import random
intent2resps = {
  intent["intent"]: intent["responses"]
  for intent in data["intents"]
}

def predict_intent(text):
    seq = tokenizer.texts_to_sequences([text])
    p   = pad_sequences(seq, maxlen=maxlen, padding='post')
    pred= model.predict(p)[0]
    idx = pred.argmax()
    intent_name = le.inverse_transform([idx])[0]
    conf = pred[idx]
    return intent_name, conf    

def bot_response(text):
    intent, conf = predict_intent(text)
    if conf < 0.3:
        return "Sorry, I didn’t get that. Can you rephrase?"
    return random.choice(intent2resps[intent])

# example
print(bot_response("Hi"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Sorry, I didn’t get that. Can you rephrase?
