### Import Library

In [42]:
import json
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer, tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
from flask import Flask, request, jsonify
from tensorflow.keras.models import load_model
from threading import Thread
import requests

In [26]:
# Load dataset QnA dari file .txt
with open("qna_dataset.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()


### Load Dataset 

In [27]:
questions, answers = [], []
for i in range(0, len(lines), 3):
    if i + 1 < len(lines) and lines[i].startswith("User:") and lines[i + 1].startswith("Bot:"):
        questions.append(lines[i].replace("User:", "").strip())
        answers.append(lines[i + 1].replace("Bot:", "").strip())

### Merge Dataset  Tourism and User

In [28]:
# Tokenisasi
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)
vocab_size = len(tokenizer.word_index) + 1
max_len = 20


### Rename Dataset 

In [29]:

X = pad_sequences(tokenizer.texts_to_sequences(questions), maxlen=max_len, padding="post")
y = pad_sequences(tokenizer.texts_to_sequences(answers), maxlen=max_len, padding="post")
y_seq = np.array([seq[0] if len(seq) > 0 else 0 for seq in y])


In [None]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_len))
model.add(LSTM(128, return_sequences=True))  # agar output-nya sequence
model.add(Dense(vocab_size, activation="softmax"))

In [31]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=100)


Epoch 1/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 240ms/step - accuracy: 0.3427 - loss: 7.6209
Epoch 2/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 232ms/step - accuracy: 0.3653 - loss: 4.7383
Epoch 3/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 231ms/step - accuracy: 0.3649 - loss: 4.5859
Epoch 4/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 234ms/step - accuracy: 0.3843 - loss: 4.2903
Epoch 5/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 234ms/step - accuracy: 0.4216 - loss: 4.0570
Epoch 6/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 235ms/step - accuracy: 0.4349 - loss: 3.8212
Epoch 7/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 233ms/step - accuracy: 0.4521 - loss: 3.6373
Epoch 8/100
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 239ms/step - accuracy: 0.4831 - loss: 3.4957
Epoch 9/100
[1m55/55[0

<keras.src.callbacks.history.History at 0x1755947d0>

In [32]:
# Simpan model dan tokenizer
model.save("chatbot_model.h5")
with open("preprocessed_data.json", "w", encoding="utf-8") as f:
    json.dump({
        "tokenizer_config": tokenizer.to_json(),
        "vocab_size": vocab_size,
        "max_len": max_len
    }, f)



### Tokenisasi

In [33]:
app = Flask(__name__)

In [34]:
# Load model & tokenizer
model = load_model("chatbot_model.h5")



In [37]:
with open("preprocessed_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

In [38]:
# ✅ tokenizer_config dipastikan berbentuk dictionary
tokenizer = tokenizer_from_json(data["tokenizer_config"])
vocab_size = data["vocab_size"]
max_len = data["max_len"]

In [39]:
# Buat reverse mapping dari index ke kata
reverse_word_index = {v: k for k, v in tokenizer.word_index.items()}

In [40]:
@app.route("/chat", methods=["POST"])
def chat():
    user_input = request.json.get("message", "")
    seq = tokenizer.texts_to_sequences([user_input])
    padded = pad_sequences(seq, maxlen=max_len, padding="post")

    prediction = model.predict(padded)  # Shape: (1, max_len, vocab_size)
    predicted_seq = np.argmax(prediction, axis=-1)[0]  # Ambil token ID

    # Ambil kata dari token ID, kecuali 0 (padding)
    response_words = [reverse_word_index.get(idx, "") for idx in predicted_seq if idx != 0]
    response = " ".join(response_words).strip()

    return jsonify({"response": response})


In [None]:
if __name__ == "__main__":
    Thread(target=lambda: app.run(debug=True, port=12000,use_reloader=False)).start()

 * Serving Flask app '__main__'
 * Debug mode: on


Address already in use
Port 12000 is in use by another program. Either identify and stop that program, or start the server with a different port.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step


INFO:werkzeug:127.0.0.1 - - [06/Apr/2025 13:34:29] "[35m[1mPOST /chat HTTP/1.1[0m" 500 -
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/flask/app.py", line 1498, in __call__
    return self.wsgi_app(environ, start_response)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/flask/app.py", line 1476, in wsgi_app
    response = self.handle_exception(e)
               ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/flask/app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/flask/app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/flask/app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^

In [46]:
# 1. Masukkan pertanyaan
user_input = "Rekomendasikan tempat wisata di Makassar"

# 2. Tokenisasi
seq = tokenizer.texts_to_sequences([user_input])
padded = pad_sequences(seq, maxlen=max_len, padding='post')

# 3. Prediksi
prediction = model.predict(padded)

# 4. Ambil indeks kata yang diprediksi
predicted_seq = np.argmax(prediction, axis=-1)[0]  # misal: [3, 45, 9, 0, 0, ...]

# 5. Ubah indeks ke kata
response_words = [reverse_word_index.get(i, "") for i in predicted_seq if i != 0]
response = " ".join(response_words)

# 6. Tampilkan hasil
print("Input pengguna:", user_input)
print("Prediksi token:", predicted_seq)
print("Respon chatbot:", response)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Input pengguna: Rekomendasikan tempat wisata di Makassar
Prediksi token: [1066   13  971 5327   13   13    9    0    0    0    0    0    0    0
    0    0    0    0    0    0]
Respon chatbot: great rating setiya aneh rating rating harga
