# **Step 3: Sentiment Classification (Inference)**

## **1. Import Libraries**

In [1]:
#Import some required packages and libraries
import pickle
import numpy as np
import pandas as pd
import re
import string
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
!pip install wordcloud sastrawi
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!




## **2. Load Pre-trained Models & Tokenizers**

In [2]:
# Load Pre-trained LSTM model
lstm_model = load_model("lstm_sentiment_model.keras")

# Load Pre-trained SVM model
with open("svm_model.pkl", "rb") as f:
    svm_model = pickle.load(f)

# Load Pre-trained Logistic Regression model
with open("logreg_model.pkl", "rb") as f:
    logreg_model = pickle.load(f)

# Load TF-IDF Vectorizer
with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf = pickle.load(f)

# Load Tokenizer for LSTM
with open("tokenizer_lstm.pkl", "rb") as f:
    tokenizer = pickle.load(f)

  saveable.load_own_variables(weights_store.get(inner_path))


## **3. Text Preprocessing**

In [3]:
#Clean text for sentiment analysis (Cleaning Text)
def cleanText(text):
    text = text.lower() #Convert text to lowercase
    text = re.sub(r'@[A-Za-z0-9]+', '', text) #Remove mentions (@username)
    text = re.sub(r'#[A-Za-z0-9]+', '', text) #Remove hashtags (#hashtag)
    text = re.sub(r"http\S+", '', text) #Remove URLs (https://...)
    text = re.sub(r'[^\w\s,]', '', text, flags=re.UNICODE) #Remove emojis
    text = re.sub(r'\d+', '', text) #Remove numbers
    text = text.replace('\n', ' ') #Replace newlines with space
    text = text.translate(str.maketrans('', '', string.punctuation)) #Remove all punctuation
    text = text.strip(' ') #Remove leading and trailing spaces
    return text

In [4]:
#Split the text into individual word tokens (Tokenization)
def tokenText(text):
    text = word_tokenize(text)
    return text

In [5]:
#Dictionary of slang words and their standard equivalents
slangwords = {
    "@": "di", "abis": "habis", "masi": "masih", "bgt": "banget", "maks": "maksimal",
    "tp": "tapi", "jd": "jadi", "krn": "karena", "trs": "terus", "good": "bagus",
    "dmn": "dimana", "hrs": "harus", "sy": "saya", "sm": "sama", "very good": "mantap",
    "utk": "untuk", "sbg": "sebagai", "blm": "belum", "udh": "sudah", "sdh": "sudah",
    "skrg": "sekarang", "dpt": "dapat", "tdk": "tidak", "bs": "bisa", "gk": "tidak",
    "dr": "dari", "dg": "dengan", "aja": "saja", "smua": "semua", "ngk": "tidak",
    "apk": "aplikasi", "dev": "developer", "bug": "kesalahan", "jlk": "jelek",
    "crash": "gagal", "sgt": "sangat", "fitur": "fungsi", "kcw": "kecewa", "g": "tidak",
    "err": "error", "eror": "error", "lag": "lambat", "bgs": "bagus", "gagal load": "gagal memuat",
    "ngebug": "bermasalah", "lemot": "lambat", "bagu": "bagus", "gabisa": "tidak bisa",
    "kehapus": "terhapus", "ribet": "sulit", "males": "tidak mau", "terimakasih": "terima kasih",
    "gampang": "mudah", "cepet": "cepat", "error": "kesalahan", "ngga": "tidak",
    "coba2": "mencoba", "cape": "capek", "drpd": "daripada", "ampun": "parah",
    "parah": "buruk", "mantep": "mantap", "makasih": "terima kasih", "gpp": "tidak apa-apa",
    "bbrp": "beberapa", "syg": "sayang", "dmna": "dimana", "lg": "lagi",
    "stuck": "macet", "gmn": "gimana", "kliatan": "kelihatan", "ampas": "buruk",
    "gajelas": "tidak jelas", "gaje": "tidak jelas", "kalo": "kalau", "td": "tadi",
    "gt": "gitu", "gitu": "begitu", "org": "orang", "blg": "bilang", "tlg": "tolong",
    "ak": "aku", "gw": "saya", "loe": "kamu", "lo": "kamu", "gua": "saya",
    "bro": "saudara", "sis": "saudari", "kmrn": "kemarin", "br": "baru",
    "btw": "ngomong-ngomong", "tq": "terima kasih", "kpn": "kapan",
    "knp": "kenapa", "bkn": "bukan", "aneh": "tidak biasa", "cmn": "cuman",
    "cm": "cuma", "kyk": "seperti", "krna": "karena", "yg": "yang",
    "lah": "", "sih": "", "jgn": "jangan", "uda": "sudah", "gws": "sehat selalu",
    "maap": "maaf", "ajg": "anjing", "anjir": "anjing", "mantul": "mantap betul",
    "cmiiw": "correct me if I'm wrong", "afk": "away from keyboard",
    "plis": "tolong", "kl": "kalau", "pdhl": "padahal", "udah": "sudah",
    "jdwl": "jadwal", "pk": "pakai", "prnh": "pernah", "ky": "kayak",
    "trmksh": "terima kasih", "sbnrnya": "sebenarnya", "smpe": "sampai",
    "jdul": "judul", "cpt": "cepat", "tlpn": "telepon", "bsok": "besok",
    "sjk": "sejak", "gaada": "tidak ada", "gmna": "bagaimana", "mo": "mau",
    "bbrapa": "beberapa", "sdikit": "sedikit", "lbih": "lebih", "msh": "masih",
    "ntr": "nanti", "gtw": "gak tau", "kek": "seperti", "mnding": "mendingan",
    "aj": "saja", "ni": "ini", "da": "sudah", "ogut": "saya", "noob": "pemula",
    "mauu": "mau", "mls": "malas", "bngt": "banget", "mksd": "maksud",
    "trnyata": "ternyata", "cb": "coba", "blh": "boleh", "prcma": "percuma",
    "ktmu": "ketemu", "dlu": "dulu", "bener": "benar", "bner": "benar",
    "kesel": "kesal", "mendingan": "lebih baik", "biar": "supaya",
    "akhlak": "moral", "dah": "sudah", "skli": "sekali"
}

In [6]:
#Replace slang words with their standard equivalents.
def replaceSlang(text, slangwords):
    words = text.split()
    new_words = [slangwords[word] if word in slangwords else word for word in words]
    return ' '.join(new_words)

In [7]:
#Remove stopwords from the text (Stopword Removal)
def filterText(text):
    listStopwords = set(stopwords.words("indonesian")) #Load Indonesian stopwords
    listStopwords1 = set(stopwords.words("english")) #Load English stopwords
    listStopwords.update(listStopwords1) #Combine Indonesian and English stopwords
    listStopwords.update([
        "iya", "yaa", "gak", "gk", "nya", "na", "sih", "ku", "di", "ga", "ya", "gaa", "loh", "kah", "woi", "woii", "woy",
        "dong", "deh", "nih", "tuh", "klo", "mah", "lho", "kan", "kayak", "banget", "aja", "kok", "sama", "gitu", "dah",
        "lah", "tau", "udah", "belum", "emang", "eh", "masa", "kayaknya", "soalnya", "gimana", "kenapa", "pokoknya",
        "apalagi", "terus", "mending", "bakal", "tapi", "padahal", "walaupun", "daripada", "abis", "doang", "sangat",
        "sekali", "lebih", "paling", "tetep", "tetapi", "sampe", "makanya", "ke", "buat", "biar", "hampir",
        "bukan", "malah", "meskipun", "mungkin", "so", "tp", "jd", "jg", "krn", "trs", "dmn", "hrs", "sy", "saya",
        "anda", "kamu", "kalian", "dia", "mereka", "kita", "aku", "gua", "gw", "lu", "loe"
    ]) #Add custom stopwords
    filtered = [txt for txt in text if txt not in listStopwords]
    text = filtered
    return text

In [8]:
#Stemming Process using Sastrawi Library
factory = StemmerFactory()
stemmer = factory.create_stemmer()
def stemmingText(text, stemmer):
    return [stemmer.stem(word) for word in text]

In [9]:
#Convert a list of words into sentences
def toSentence(list_words):
    return ' '.join(list_words)

## **4. Preprocessing New Sample Data**

In [10]:
#Predicting Sentiment on New Data
new_texts = [
    "% Aplikasinya keren banget! Mantap.",
    "Sudah bagus, semangat terus yah devnya",
    "Saya suka tampilannya, simpel dan mudah digunakan.",
    "Susah masuk, tapi ya sudah gpp",
    "Payah banget aplikasinya !!!",
    "Login cuman disuruh masukin email aja kacauuu.",
    "Tidak ada masalah sejauh ini, bagus kok!",
]

In [11]:
#Full preprocessing function
df_new = pd.DataFrame({'ulasan': new_texts})

df_new['txt_clean'] = df_new['ulasan'].apply(cleanText)
df_new['txt_slangwords'] = df_new['txt_clean'].apply(lambda x: replaceSlang(x, slangwords))
df_new['txt_tokenText'] = df_new['txt_slangwords'].apply(tokenText)
df_new['txt_stopword'] = df_new['txt_tokenText'].apply(filterText)
df_new['txt_stemming'] = df_new['txt_stopword'].apply(lambda x: stemmingText(x, stemmer))
df_new['fix_text'] = df_new['txt_stemming'].apply(toSentence)

clean_texts = df_new['fix_text'].tolist()

new_tfidf = tfidf.transform(clean_texts)
new_sequences = tokenizer.texts_to_sequences(clean_texts)
new_padded = pad_sequences(new_sequences, maxlen=100)

## **5. Predicting Sentiment from New Data**

In [12]:
#Sentiment prediction with various models
label_names = ["Negative", "Neutral", "Positive"]
#Predict with SVM
svm_preds = svm_model.predict(new_tfidf)
svm_labels = [label_names[pred] for pred in svm_preds]
#Predict with Logistic Regression
logreg_preds = logreg_model.predict(new_tfidf)
logreg_labels = [label_names[pred] for pred in logreg_preds]
#Predict with LSTM
lstm_probs = lstm_model.predict(new_padded)
lstm_preds = np.argmax(lstm_probs, axis=1)
lstm_labels = [label_names[pred] for pred in lstm_preds]
for i, text in enumerate(new_texts):
    print(f"Review: {text}")
    print(f" - SVM Prediction: {svm_labels[i]}")
    print(f" - Logistic Regression Prediction: {logreg_labels[i]}")
    print(f" - LSTM Prediction: {lstm_labels[i]}")
    print("-"*50)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 851ms/step
Review: % Aplikasinya keren banget! Mantap.
 - SVM Prediction: Positive
 - Logistic Regression Prediction: Positive
 - LSTM Prediction: Positive
--------------------------------------------------
Review: Sudah bagus, semangat terus yah devnya
 - SVM Prediction: Positive
 - Logistic Regression Prediction: Positive
 - LSTM Prediction: Positive
--------------------------------------------------
Review: Saya suka tampilannya, simpel dan mudah digunakan.
 - SVM Prediction: Positive
 - Logistic Regression Prediction: Positive
 - LSTM Prediction: Positive
--------------------------------------------------
Review: Susah masuk, tapi ya sudah gpp
 - SVM Prediction: Neutral
 - Logistic Regression Prediction: Neutral
 - LSTM Prediction: Neutral
--------------------------------------------------
Review: Payah banget aplikasinya !!!
 - SVM Prediction: Negative
 - Logistic Regression Prediction: Negative
 - LSTM Prediction: Nega