In [5]:
import json
import string
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

In [6]:
# Define stemmer and stopword remover
factory = StemmerFactory()
stemmer = factory.create_stemmer()

factory = StopWordRemoverFactory()
stopwords = factory.get_stop_words()

In [7]:
# Load json file
f = open('intent/intent.json', 'r')
intent_json = json.load(f)

In [8]:
slang = pd.read_csv('lexicon/slang ke semi baku.csv')

slang_replace = {}
for i, row in enumerate(slang['slang']):
    slang_replace[row] = slang['formal'].iloc[i]

In [9]:
baku = pd.read_csv('lexicon/slang ke baku.csv')

std_word_replace = {}
for i, row in enumerate(baku['slang']):
    std_word_replace[row] = baku['baku'].iloc[i]

In [10]:
model = tf.keras.models.load_model('saved_model/model')

In [11]:
with open("saved_model/encoder.pkl", "rb") as f:
    le = pickle.load(f)

In [12]:
from_disk = pickle.load(open("saved_model/textvect.pkl", "rb"))
textvect = tf.keras.layers.TextVectorization.from_config(from_disk['config'])
# You have to call `adapt` with some dummy data (BUG in Keras)
textvect.adapt(tf.data.Dataset.from_tensor_slices(["xyz"]))
textvect.set_weights(from_disk['weights'])

In [13]:
# Create text cleaning function
def clean_text(text):
    new_text = []

    text = text.lower() # Lowercase

    # Remove punctuations
    text = text.translate(
        str.maketrans(
            '',
            '',
            string.punctuation
        )
    )

    # Split text into words then loop through each word
    for kata in text.split(): 
        # Keep word not in slang or standard word
        if kata not in (slang_replace|std_word_replace): 
            new_text.append(kata) 
        # Replace non-formal word with standard word
        elif kata in std_word_replace:
            new_text+=std_word_replace[kata].split() 
        # Replace slang with standard word
        elif kata in slang_replace:
            new_text+=slang_replace[kata].split() 

    # Join words without stopwords
    new_text = ' '.join(
        stemmer.stem(
            std_word_replace.get(
                word,
                word
            )
        ) for word in new_text if word not in stopwords 
    )
    
    return new_text

In [14]:
def bot_response(text):
    """Take text as function input then predict using model. Return response based on highest probability using numpy argmax    
    """
    text = clean_text(text)
    pred = model.predict([text])
    res = le.classes_[pred.argmax()]
    if textvect(text).numpy().max() > 1:
        for label_pred in intent_json['intents']:
            if label_pred['intent'] == res:
                response = label_pred['response']
    else:
        response = ['Maaf, saya tidak mengerti']
    
    dict_temp = []
    for i in range(len(pred[0])):
        temp = {le.classes_[i]: pred[0][i]}
        dict_temp.append(temp)
    return print(np.random.choice(response))

In [15]:
bot_response('laper pengen makan jeruk')

Maaf, saya tidak mengerti


In [16]:
bot_response('pas foto hilang gimana ya')

Pas foto merupakan dokumen wajib pendaftaran ya, Kak. Berikut ketentuan pas foto yang harus dipenuhi:
- Background berwarna biru dengan pakaian formal (kemeja dan/atau jas)
- Foto berukuran 3x4 dan berwarna
- Disarankan untuk menggunakan foto terbaru
- Upload foto di https://rekrutmen.fiktif.id/dokumen dengan ukuran file tidak lebih dari 1 MB dengan format file jpg/png/jpeg
