In [483]:
# import library
import string
import re
import numpy as np
import pandas as pd
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from util import JSONParser
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [484]:
# load data
path = "dataset/intents.json"

# buat objek JSONParser dan parse data intents.json
jp = JSONParser()
jp.parse(path)

# simpan dataframe dalam variabel df
df = jp.get_dataframe()

[INFO] Data JSON converted to DataFrame with shape : (1003, 2)


In [485]:
df

Unnamed: 0,text_input,intents
0,Hai,menyapa
1,Halo,menyapa
2,Hei,menyapa
3,Oi,menyapa
4,Woy,menyapa
...,...,...
998,Kopi yang paling mahal dijual disini ada gak sih?,kopi_mahal
999,jenis kopi yang harganya paling mahal ada gak?...,kopi_mahal
1000,Punya kopi yang harganya paling mahal kan,kopi_mahal
1001,Punya kopi termahal kan? kasih tau aku bisa?,kopi_mahal


In [486]:
sample_chat = df.text_input[200]
sample_chat


'Apa bisa batalin pesanan?'

In [487]:
chat_lower = sample_chat.lower()
chat_lower

'apa bisa batalin pesanan?'

In [488]:
# Remove Punctuation From Sample Text
chat_punct = chat_lower.translate(str.maketrans("","",string.punctuation))
chat_punct

'apa bisa batalin pesanan'

In [489]:
# Change Not Alphabetical With Spaces From Sample Text
chat_punct = re.sub("[^A-Za-z\s']"," ", chat_punct)
chat_punct

'apa bisa batalin pesanan'

In [490]:
chat_punct = chat_punct.strip()
chat_punct

'apa bisa batalin pesanan'

In [491]:
# create stemmer
stemmer = StemmerFactory().create_stemmer()

# stemming process
output   = stemmer.stem(chat_punct)
output

'apa bisa batalin pesan'

In [492]:
# Stopwords remove
stopword = StopWordRemoverFactory().create_stop_word_remover()

output_stopword = stopword.remove(output)
output_stopword

'batalin pesan'

In [498]:
# Create Function To Case Folding Corpus
def document_processing(document):
    # Transform Document Into Lowercase
    document = document.lower()

    # Remove Punctuation From Document
    document = document.translate(str.maketrans("","",string.punctuation))

    # Remove Digit From Document
    document = re.sub("[^A-Za-z\s']"," ", document)

    # Remove Tab From Document
    document = document.strip()

    #Stemmer
    stemmer = StemmerFactory().create_stemmer()

    # stemming process
    document = stemmer.stem(document)

    return document

In [499]:
# Split Data and Target
X = df.text_input
y = df.intents

In [500]:
X_proc = X.apply(document_processing)

In [501]:
vect = CountVectorizer()
vect.fit(X_proc)

In [502]:
X_vect = vect.transform(X_proc)

In [503]:
# Split Data for Train+Validation Set and Test-Set
X_train, X_test,  y_train, y_test = train_test_split(X_proc, y,test_size=0.2, random_state=4)
print(f"Train-Set shape      : {len(X_train),len(y_train)}")
print("Test-Set shape       :",X_test.shape)

Train-Set shape      : (802, 802)
Test-Set shape       : (201,)


In [504]:
nb = make_pipeline(CountVectorizer(),
                   MultinomialNB())

# Training
nb.fit(X_train, y_train)

In [505]:
rf = make_pipeline(CountVectorizer(),
                   RandomForestClassifier())

# Training
rf.fit(X_train, y_train)

In [506]:
dt = make_pipeline(CountVectorizer(),
                   DecisionTreeClassifier())

# Training
dt.fit(X_train, y_train)

In [507]:
y_train_pred_nb = nb.predict(X_train)
y_test_pred_nb = nb.predict(X_test)
print("Train-Set Multinomial Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_train_pred_nb, y_train)*100)
print("Test-Set Multinomial Naive Bayes model accuracy(in %) :", metrics.accuracy_score(y_test_pred_nb, y_test)*100)

Train-Set Multinomial Naive Bayes model accuracy(in %): 97.2568578553616
Test-Set Multinomial Naive Bayes model accuracy(in %) : 89.05472636815921


In [508]:
y_train_pred_dt = dt.predict(X_train)
y_test_pred_dt = dt.predict(X_test)
print("Train-Set Multinomial Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_train_pred_dt, y_train)*100)
print("Test-Set Multinomial Naive Bayes model accuracy(in %) :", metrics.accuracy_score(y_test_pred_dt, y_test)*100)

Train-Set Multinomial Naive Bayes model accuracy(in %): 99.87531172069825
Test-Set Multinomial Naive Bayes model accuracy(in %) : 83.5820895522388


In [509]:
y_train_pred_rf = rf.predict(X_train)
y_test_pred_rf = rf.predict(X_test)
print("Train-Set Multinomial Naive Bayes model accuracy(in %):", metrics.accuracy_score(y_train_pred_rf, y_train)*100)
print("Test-Set Multinomial Naive Bayes model accuracy(in %) :", metrics.accuracy_score(y_test_pred_rf, y_test)*100)

Train-Set Multinomial Naive Bayes model accuracy(in %): 99.87531172069825
Test-Set Multinomial Naive Bayes model accuracy(in %) : 86.06965174129353


In [516]:
print("Anda Terhubung dengan chatbot Kami")
while True:
    # input user
    chat = input("Anda : ")
    # praproses
    chat_processed = document_processing(chat)
    # prediksi intent
    res = nb.predict_proba([chat_processed])
    # ambil nilai probabilitas & lokasinya
    max_prob = max(res[0])
    max_idx = np.argmax(res[0])
    # kondisi jika probabilitas kurang dari threshold
    print("Saya :",chat)
    if max_prob < 0.20:
        print("Bot  : Maaf Kak, aku ga ngerti")
    else:
        print(f"Bot  : {jp.get_response(nb.classes_[max_idx])}\n")
    if nb.classes_[max_idx] == 'menutup':
        break
    

Anda Terhubung dengan chatbot Kami
Saya : selamat sore
Bot  : Hai kak, terima kasih sudah menghubungi kami! apakah ada yang bisa saya bantu?

Saya : kamu siapa? kenalan dulu dong
Bot  : Salam kenal kak, aku Gitcoff

Saya : ooo gitcoff, kamu disini tugasnya ngapain ya?
Bot  : Tenang kak, disini aku bisa bantu untuk jawab pertanyaan kakak seputar produk, pemesanan, pembayaran, dan lain-lain ya. Kalau aku masih belum paham sama pertanyaan kakak, nanti bisa disambungkan langsung ke CS kami kok

Saya : bagus lah kalo gitu, oiya ini nama tokonya apa sih?
Bot  : Git Coffee menjual berbagai macam kopi siap saji produksi sendiri nih kak. Ada kopi arabica, robusta, luwak, latte, macchiato, dan ada juga kopi dengan berbagai rasa lainnya. Silakan melihat katalog kami yaa untuk lebih lengkapnya di www.Git Coffee.com (:

Saya : banyak ya, kalau aku mau pesen kopinya gemana tuh?
Bot  : Kakak bisa memesan produk pada website resmi kami di www.web.com, untuk cara pemesanannya bisa kakak lihat pada www.