<a href="https://colab.research.google.com/github/MammadovN/Machine_Learning/blob/main/projects/04_natural_language_processing/chatbot/seq2seq_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install -q tensorflow nltk numpy

In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention, Concatenate
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
conversations = [
    "Hello|Hi there!",
    "How are you?|I'm good, thank you!",
    "What is your name?|I am a chatbot.",
    "Thank you|You're welcome!",
    "Goodbye|See you later!",
    "What do you like?|I like helping people.",
    "Tell me a joke.|Why don't skeletons fight each other? They don't have the guts!",
    "What's the weather like?|I'm not sure, but I hope it's sunny!",
    "Can you help me?|Of course! What do you need?",
    "Where are you from?|I exist in the digital world!"
]


questions, answers = [], []
for line in conversations:
    q, a = line.split('|')
    questions.append(q)
    answers.append(a)


tokenizer = Tokenizer(oov_token='<unk>')
tokenizer.fit_on_texts(questions + answers)

START_TOKEN = '<start>'
END_TOKEN   = '<end>'

for tok in (START_TOKEN, END_TOKEN):
    if tok not in tokenizer.word_index:
        idx = len(tokenizer.word_index) + 1
        tokenizer.word_index[tok] = idx
        tokenizer.index_word[idx] = tok


q_seqs = tokenizer.texts_to_sequences(questions)
a_seqs = tokenizer.texts_to_sequences(answers)

max_len_q = max(len(s) for s in q_seqs)
max_len_a = max(len(s) for s in a_seqs) + 2  # start/end için +2

q_padded = pad_sequences(q_seqs, maxlen=max_len_q, padding='post')
a_padded = pad_sequences(
    [[tokenizer.word_index[START_TOKEN]] + s + [tokenizer.word_index[END_TOKEN]]
     for s in a_seqs],
    maxlen=max_len_a, padding='post'
)

vocab_size = len(tokenizer.word_index) + 1


In [6]:
# Encoder
enc_inputs   = Input(shape=(None,), name="encoder_inputs")
enc_emb      = Embedding(input_dim=vocab_size, output_dim=256, name="encoder_embedding")(enc_inputs)
enc_lstm     = LSTM(256, return_sequences=True, return_state=True, name="encoder_lstm")
enc_outs, enc_h, enc_c = enc_lstm(enc_emb)
encoder_states = [enc_h, enc_c]

# Decoder
dec_inputs  = Input(shape=(None,), name="decoder_inputs")
dec_emb     = Embedding(input_dim=vocab_size, output_dim=256, name="decoder_embedding")(dec_inputs)
dec_lstm    = LSTM(256, return_sequences=True, return_state=True, name="decoder_lstm")
dec_outs, _, _ = dec_lstm(dec_emb, initial_state=encoder_states)

# Attention + Concat + Dense
attn_layer    = Attention(name="attention_layer")
attn_outs     = attn_layer([dec_outs, enc_outs])                         # (batch, T_dec, 256)
concat_layer  = Concatenate(axis=-1, name="concat_layer")([dec_outs, attn_outs])  # (batch, T_dec, 512)
dense_layer   = Dense(vocab_size, activation='softmax', name="output_dense")
dec_preds     = dense_layer(concat_layer)

# Full training modeli
model = Model([enc_inputs, dec_inputs], dec_preds, name="seq2seq_with_attention")
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [7]:
decoder_input_data  = a_padded[:, :-1]
decoder_target_data = a_padded[:, 1:]

model.fit(
    [q_padded, decoder_input_data],
    np.expand_dims(decoder_target_data, -1),
    batch_size=16,
    epochs=50,
    verbose=1
)


encoder_model = Model(
    inputs=enc_inputs,
    outputs=[enc_outs, enc_h, enc_c],
    name="encoder_inference"
)

# --- Decoder
dec_input_token = Input(shape=(1,),   name="dec_input_token")
dec_state_h     = Input(shape=(256,), name="dec_state_h")
dec_state_c     = Input(shape=(256,), name="dec_state_c")
enc_outs_inf    = Input(shape=(None, 256), name="enc_outs_inf")

# Embedding + LSTM
dec_emb_inf     = dec_emb = model.get_layer("decoder_embedding")(dec_input_token)
dec_outs_inf, dec_h_inf, dec_c_inf = model.get_layer("decoder_lstm")(
    dec_emb_inf, initial_state=[dec_state_h, dec_state_c]
)

# Attention + Concat + Dense
attn_inf       = model.get_layer("attention_layer")([dec_outs_inf, enc_outs_inf])
concat_inf     = model.get_layer("concat_layer")([dec_outs_inf, attn_inf])
dec_preds_inf  = model.get_layer("output_dense")(concat_inf)

decoder_model = Model(
    inputs=[dec_input_token, enc_outs_inf, dec_state_h, dec_state_c],
    outputs=[dec_preds_inf, dec_h_inf, dec_c_inf],
    name="decoder_inference"
)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.0083 - loss: 4.1344
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - accuracy: 0.5083 - loss: 4.0715
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step - accuracy: 0.5083 - loss: 4.0030
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step - accuracy: 0.5000 - loss: 3.9157
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step - accuracy: 0.5000 - loss: 3.7939
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.5000 - loss: 3.6153
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step - accuracy: 0.5000 - loss: 3.3481
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 0.5000 - loss: 2.9763
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [8]:
def chatbot_response(user_input):
    seq = tokenizer.texts_to_sequences([user_input])
    pad = pad_sequences(seq, maxlen=max_len_q, padding='post')
    enc_outs, h, c = encoder_model.predict(pad)

    target_seq = np.array([[tokenizer.word_index[START_TOKEN]]])
    states = [h, c]
    response_tokens = []

    for _ in range(max_len_a):
        preds, h, c = decoder_model.predict([target_seq, enc_outs] + states)
        idx = np.argmax(preds[0, -1, :])
        if idx == tokenizer.word_index[END_TOKEN]:
            break
        response_tokens.append(tokenizer.index_word.get(idx, '<unk>'))
        target_seq = np.array([[idx]])
        states = [h, c]

    return ' '.join(response_tokens)

In [11]:
print("Chatbot is active! Type 'exit' to quit.")
while True:
    user_text = input("You: ")
    if user_text.lower() in ('exit'):
        print("Chatbot: Goodbye!")
        break
    print("Chatbot:", chatbot_response(user_text))

Chatbot is active! Type 'exit' to quit.
You: bye
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Chatbot: hi
You: goodbye
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Chatbot: hi there
You: exit
Chatbot: Goodbye!
