In [24]:
import numpy as np 
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense,TimeDistributed,LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from flask import Flask, request, jsonify
import pandas as pd 
import os
import nltk
from threading import Thread
import requests

INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:41:10] "GET /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:41:28] "GET /chat HTTP/1.1" 200 -


In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/macbookpro2019/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Load Dataset 

In [4]:
tourism_info = pd.read_csv("tourism_with_id.csv")
tourism_rating = pd.read_csv("tourism_rating.csv")
users = pd.read_csv("user.csv")
users

Unnamed: 0,User_Id,Location,Age
0,1,"Semarang, Jawa Tengah",20
1,2,"Bekasi, Jawa Barat",21
2,3,"Cirebon, Jawa Barat",23
3,4,"Bekasi, Jawa Barat",21
4,5,"Lampung, Sumatera Selatan",20
...,...,...,...
295,296,"Lampung, Sumatera Selatan",31
296,297,"Palembang, Sumatera Selatan",39
297,298,"Bogor, Jawa Barat",38
298,299,"Sragen, Jawa Tengah",27


### Merge Dataset  Tourism and User

In [5]:
# Gabungkan dataset dengan kunci yang benar
merged_data = tourism_info.merge(tourism_rating, on="Place_Id", how="left")  # Gunakan Place_Id
merged_data = merged_data.merge(users, on="User_Id", how="left")  # Gunakan User_Id

### Rename Dataset 

In [6]:

# Ambil hanya kolom yang relevan
dataset = merged_data[['Place_Name', 'Description']].dropna().head(1000)
dataset.rename(columns={'Place_Name': 'question', 'Description': 'answer'}, inplace=True)

### Tokenisasi

In [7]:
# Tokenisasi data
questions = dataset["question"].tolist()
answers = dataset["answer"].tolist()

###  Inisialisasi tokenizer

In [8]:
# Inisialisasi tokenizer
vocab_size = 2000  # Pastikan vocab_size cukup besar
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(questions + answers)

### Konversi teks ke urutan token

In [9]:
# Konversi teks ke urutan token
X = tokenizer.texts_to_sequences(questions)
y = tokenizer.texts_to_sequences(answers)

In [10]:
# Ambil panjang sequence maksimal dari X
max_sequence_length = max(len(seq) for seq in X)

In [11]:
# Terapkan padding ke semua sequence agar memiliki panjang yang sama
X = pad_sequences(X, maxlen=max_sequence_length, padding='post')
y = pad_sequences(y, maxlen=max_sequence_length, padding='post')

### Konversi output menjadi one-hot encoding

In [12]:
# Konversi y ke one-hot encoding dengan ukuran vocab yang benar
y = np.array([np.eye(vocab_size)[seq] for seq in y])  # One-hot encoding

### Parameter Model

In [13]:
# Hyperparameter
embedding_dim = 64
lstm_units = 128

In [14]:

def build_model(vocab_size, embedding_dim, lstm_units, max_sequence_length):
    model = Sequential([
        Embedding(vocab_size, embedding_dim, input_length=max_sequence_length),
        LSTM(lstm_units, return_sequences=True),
        TimeDistributed(Dense(vocab_size, activation='softmax'))  # Output sesuai vocab_size
    ])
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [15]:
def train_and_save_model():
    model = build_model(vocab_size, embedding_dim, lstm_units, max_sequence_length)
    print("Starting model training...")
    model.fit(X, y, epochs=20, batch_size=4, verbose=1)
    print("Model training completed.")
    model.save('chatbot_lstm.h5')
    print("Model saved as chatbot_lstm.h5.")
    return model


In [16]:
# Cek apakah model sudah ada, jika tidak maka latih model
if not os.path.exists('chatbot_lstm.h5'):
    train_and_save_model()

In [17]:
# Load model yang telah disimpan
model = load_model('chatbot_lstm.h5')



In [18]:
app = Flask(__name__)

In [19]:
app.view_functions.pop('predict', None)  # Hapus jika sudah ada
app.view_functions.pop('chat', None)     # Hapus jika sudah ada


In [20]:
@app.route('/chat', methods=['GET', 'POST'])
def chat():
    if request.method == 'GET':
        return jsonify({'response': 'Selamat datang'})

    elif request.method == 'POST':
        user_input = request.json.get('text', '')
        seq = tokenizer.texts_to_sequences([user_input])
        seq = pad_sequences(seq, maxlen=X.shape[1], padding='post')

        prediction = model.predict(seq)
        response_idx = np.argmax(prediction, axis=-1)[0]
        response = tokenizer.index_word.get(response_idx, "Maaf, saya tidak mengerti.")

        return jsonify({'response': response})


In [None]:
if __name__ == '__main__':
    Thread(target=lambda: app.run(debug=True, port=12000,use_reloader=False)).start()

 * Serving Flask app '__main__'


 * Debug mode: on


 * Running on http://127.0.0.1:12000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:38:13] "[33mGET / HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:38:14] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:38:16] "[33mGET / HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [04/Apr/2025 08:38:20] "GET /chat HTTP/1.1" 200 -
