In [20]:
import numpy as np
import json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam

In [29]:
# -----------------------------------------
# 1. Training Data
# -----------------------------------------
questions = [
    "hi", "hello", "how are you", "what is your name",
    "bye", "thank you", "thanks"
]

answers = [
    "Hello! How can I help you?",
    "Hello! How can I help you?",
    "I am fine! How are you?",
    "I am your chatbot created using Deep Learning + FastAPI",
    "Goodbye! Have a nice day!",
    "You're welcome!",
    "You're welcome!"
]

# -----------------------------------------

In [30]:
# 2. Tokenization
# -----------------------------------------
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)

vocab_size = len(tokenizer.word_index) + 1


In [31]:
# Convert text → sequences
X = tokenizer.texts_to_sequences(questions)
y = tokenizer.texts_to_sequences(answers)



In [32]:
# Pad sequences
max_len = 10
X = pad_sequences(X, maxlen=max_len)
y = pad_sequences(y, maxlen=max_len)

y = np.array(y)


In [33]:
# -----------------------------------------
# 3. Build Model
# -----------------------------------------
model = Sequential()
model.add(Embedding(vocab_size, 64, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(vocab_size, activation="softmax"))

model.compile(loss="sparse_categorical_crossentropy", optimizer=Adam(0.001))

# -----------------------------------------
# 4. Train
# -----------------------------------------


In [34]:
# -----------------------------------------
# 4. Train
# -----------------------------------------
y = np.expand_dims(y[:, 0], axis=-1)  # first token prediction

model.fit(X, y, epochs=50)


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - loss: 3.4424
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - loss: 3.4032
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - loss: 3.3626
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - loss: 3.3178
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step - loss: 3.2661
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - loss: 3.2050
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step - loss: 3.1316
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 3.0428
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step - loss: 2.9344
Epoch 10/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - loss: 2.8019
Epoch 11/50
[1m

<keras.src.callbacks.history.History at 0x1b5b2d8a380>

In [35]:
# -----------------------------------------
# 5. Save Model + Tokenizer
# -----------------------------------------
model.save("model/chatbot_model.h5")

#with open("model/tokenizer.json", "w") as f:
  #  f.write(tokenizer.to_json())

with open("model/tokenizer.json") as f:
    tokenizer_data = json.load(f)

# Convert dict to JSON string
tokenizer_json = json.dumps(tokenizer_data)

#tokenizer = tokenizer_from_json(tokenizer_json)


print("Model training completed and saved!")



Model training completed and saved!
