### Load Model

In [11]:
from tensorflow.keras.models import load_model
import pandas as pd
import numpy as np
import string
import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [12]:
# Load model
model_rnn = load_model("model_final")

### Data Inference

In [13]:
# Load data inference
data_inf = pd.read_csv("inference_set.csv")
data_inf

Unnamed: 0,text
0,This is my first Yelp review. As is normally t...
1,Amazing food at a reasonable price. Service wa...
2,BLT Burger is the latest eatery addition to th...
3,I completely forgot to update and mention that...
4,After a so so first visit I took my wife for a...


### Preprocessing Data Inference

In [14]:
# Preprocessing function
def txt_preprocessing(datatext):
    # Ubah teks ke lowercase
    datatext = datatext.lower()

    # Menghilangkan link
    datatext = re.sub(r"http\S+", " ", datatext)
    datatext = re.sub(r"www.\S+", " ", datatext)

    # Menghilangkan karakter \n
    datatext = re.sub(r"\\n", " ", datatext)

    # Menghilangkan mention (@teks)
    datatext = re.sub("@[A-Za-z0-9_]+", " ", datatext) 

    # Menghilangkan hashtag (#teks)
    datatext = re.sub("#[A-Za-z0-9_]+", " ", datatext)

    # Menghilangkan yang karakter selain huruf
    datatext = re.sub("[^A-Za-z\s']", " ", datatext)

    # Merapihkan space kosong
    datatext = ' '.join(datatext.split())

    # Pembuangan stopword
    list_stopwords = list(set(stopwords.words('english')))
    tokens = word_tokenize(datatext)
    datatext = ' '.join([word for word in tokens if word not in list_stopwords])

    # Menghilangkan token kecil (1 - 2 huruf)
    shortword = re.compile(r'\W*\b\w{1,2}\b')
    datatext = shortword.sub(' ', datatext)

    # Menghilangkan tanda baca
    datatext = datatext.translate(str.maketrans('', '', string.punctuation))

    # Merapihkan space kosong
    datatext = ' '.join(datatext.split())

    # Lemmatizing
    lem = WordNetLemmatizer()
    tokens = word_tokenize(datatext)
    lemma_list = []
    for n in tokens:
      res = lem.lemmatize(n)
      lemma_list.append(res)

    datatext = ' '.join([word for word in lemma_list])

    return datatext

In [15]:
data_inf['text'] = data_inf['text'].apply(lambda x: txt_preprocessing(x))
data_inf

Unnamed: 0,text
0,first yelp review normally case first review c...
1,amazing food reasonable price service great st...
2,blt burger latest eatery addition mirage since...
3,completely forgot update mention spent next da...
4,first visit took wife quick lunch pressure ord...


### Predict Data Inference

In [16]:
# Prediksi dengan model
y_pred = model_rnn.predict(data_inf.text)
y_pred = np.argmax(y_pred, axis=1)
y_pred = np.where(y_pred == 1, "Positive", "Negative")



In [17]:
y_pred_df = pd.DataFrame(y_pred, columns=["sentiment_pred"])
y_pred_df

Unnamed: 0,sentiment_pred
0,Negative
1,Positive
2,Negative
3,Negative
4,Negative


In [18]:
# Concat prediksi dengan data inference
data_inf_pred_df = pd.concat([y_pred_df, data_inf], axis=1)
data_inf_pred_df

Unnamed: 0,sentiment_pred,text
0,Negative,first yelp review normally case first review c...
1,Positive,amazing food reasonable price service great st...
2,Negative,blt burger latest eatery addition mirage since...
3,Negative,completely forgot update mention spent next da...
4,Negative,first visit took wife quick lunch pressure ord...


Dari hasil prediksi data inference, satu document diprediksi termansuk sentimen positif dan empat document termasuk sentimen negatif.