## **Inference menghasilkan output berupa kelas kategorikal**

### **Import Package Pre-Processing**

In [1]:
!pip install Sastrawi



In [2]:
import re
import string
import csv
import requests
from io import StringIO
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## **Loading Dataset dari CSV**

**Memasukkan kedalam dataframe agar mudah untuk dimanipulasi dan menjadi training model**

In [3]:
# Membaca dataset dari file CSV
app_reviews_df = pd.read_csv('https://raw.githubusercontent.com/Kanaieu/analisis-sentimen-playstore-wetv/main/wetv_reviews.csv')

# Menampilkan jumlah baris dan kolom dalam DataFrame
jumlah_ulasan, jumlah_kolom = app_reviews_df.shape

# Menampilkan beberapa baris pertama dari dataset
app_reviews_df.head()

Unnamed: 0,userName,score,content
0,Bukan Robot,1,aplikasi terjelek kebanyakan nonton iklan nya ...
1,marsha Tea,1,jangan kebanyakan iklan kasian yang ga berlang...
2,Wardi Jafar,1,Gak ada film indonesia jelek
3,Nen _tarak92,1,Aplikasi nya kenapa yah setelah di update kok ...
4,Alimurrosyid Budi Rohmansyah,1,burik


In [4]:
app_reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   userName  12000 non-null  object
 1   score     12000 non-null  int64 
 2   content   12000 non-null  object
dtypes: int64(1), object(2)
memory usage: 281.4+ KB


**Membersihkan dataframe dari Value NaN dan Duplicates**

In [5]:
# Membuat DataFrame baru (clean_df) dengan menghapus baris yang memiliki nilai yang hilang (NaN) dari app_reviews_df
clean_df = app_reviews_df.dropna()

# Menghapus baris duplikat dari DataFrame clean_df
clean_df = clean_df.drop_duplicates()

# Menghitung jumlah baris dan kolom dalam DataFrame clean_df setelah menghapus duplikat
jumlah_ulasan_setelah_hapus_duplikat, jumlah_kolom_setelah_hapus_duplikat = clean_df.shape

In [6]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11997 entries, 0 to 11999
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   userName  11997 non-null  object
 1   score     11997 non-null  int64 
 2   content   11997 non-null  object
dtypes: int64(1), object(2)
memory usage: 374.9+ KB


**Pre-Processing: cleaningText(text), casefoldingText(text), tokenizingText(text), filteringText(text), stemmingText(text), toSentence(list_words)**

In [7]:
def cleaningText(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # menghapus mention
    text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
    text = re.sub(r'RT[\s]', '', text) # menghapus RT
    text = re.sub(r"http\S+", '', text) # menghapus link
    text = re.sub(r'[0-9]+', '', text) # menghapus angka
    text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka

    text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
    text = text.translate(str.maketrans('', '', string.punctuation)) # menghapus semua tanda baca
    text = text.strip(' ') # menghapus karakter spasi dari kiri dan kanan teks
    return text

def casefoldingText(text): # Mengubah semua karakter dalam teks menjadi huruf kecil
    text = text.lower()
    return text

def tokenizingText(text): # Memecah atau membagi string, teks menjadi daftar token
    text = word_tokenize(text)
    return text

def filteringText(text): # Menghapus stopwords dalam teks
    listStopwords = set(stopwords.words('indonesian'))
    listStopwords1 = set(stopwords.words('english'))
    listStopwords.update(listStopwords1)
    listStopwords.update(['iya','yaa','gak','nya','na','sih','ku',"di","ga","ya","gaa","loh","kah","woi","woii","woy"])
    filtered = []
    for txt in text:
        if txt not in listStopwords:
            filtered.append(txt)
    text = filtered
    return text

def stemmingText(text): # Mengurangi kata ke bentuk dasarnya yang menghilangkan imbuhan awalan dan akhiran atau ke akar kata
    # Membuat objek stemmer
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()

    # Memecah teks menjadi daftar kata
    words = text.split()

    # Menerapkan stemming pada setiap kata dalam daftar
    stemmed_words = [stemmer.stem(word) for word in words]

    # Menggabungkan kata-kata yang telah distem
    stemmed_text = ' '.join(stemmed_words)

    return stemmed_text

def toSentence(list_words): # Mengubah daftar kata menjadi kalimat
    sentence = ' '.join(word for word in list_words)
    return sentence

**Penghapusan kumpulan slang words atau kata-kata informal**

In [8]:
slangwords = {"@": "di", "abis": "habis", "wtb": "beli", "masi": "masih", "wts": "jual", "wtt": "tukar", "bgt": "banget", "maks": "maksimal", "plisss": "tolong", "bgttt": "banget", "indo": "indonesia", "bgtt": "banget", "ad": "ada", "rv": "redvelvet", "plis": "tolong", "pls": "tolong", "cr": "sumber", "cod": "bayar ditempat", "adlh": "adalah", "afaik": "as far as i know", "ahaha": "haha", "aj": "saja", "ajep-ajep": "dunia gemerlap", "ak": "saya", "akika": "aku", "akkoh": "aku", "akuwh": "aku", "alay": "norak", "alow": "halo", "ambilin": "ambilkan", "ancur": "hancur", "anjrit": "anjing", "anter": "antar", "ap2": "apa-apa", "apasih": "apa sih", "apes": "sial", "aps": "apa", "aq": "saya", "aquwh": "aku", "asbun": "asal bunyi", "aseekk": "asyik", "asekk": "asyik", "asem": "asam", "aspal": "asli tetapi palsu", "astul": "asal tulis", "ato": "atau", "au ah": "tidak mau tahu", "awak": "saya", "ay": "sayang", "ayank": "sayang", "b4": "sebelum", "bakalan": "akan", "bandes": "bantuan desa", "bangedh": "banget", "banpol": "bantuan polisi", "banpur": "bantuan tempur", "basbang": "basi", "bcanda": "bercanda", "bdg": "bandung", "begajulan": "nakal", "beliin": "belikan", "bencong": "banci", "bentar": "sebentar", "ber3": "bertiga", "beresin": "membereskan", "bete": "bosan", "beud": "banget", "bg": "abang", "bgmn": "bagaimana", "bgt": "banget", "bijimane": "bagaimana", "bintal": "bimbingan mental", "bkl": "akan", "bknnya": "bukannya", "blegug": "bodoh", "blh": "boleh", "bln": "bulan", "blum": "belum", "bnci": "benci", "bnran": "yang benar", "bodor": "lucu", "bokap": "ayah", "boker": "buang air besar", "bokis": "bohong", "boljug": "boleh juga", "bonek": "bocah nekat", "boyeh": "boleh", "br": "baru", "brg": "bareng", "bro": "saudara laki-laki", "bru": "baru", "bs": "bisa", "bsen": "bosan", "bt": "buat", "btw": "ngomong-ngomong", "buaya": "tidak setia", "bubbu": "tidur", "bubu": "tidur", "bumil": "ibu hamil", "burik": "buruk", "bw": "bawa", "bwt": "buat", "byk": "banyak", "byrin": "bayarkan", "cabal": "sabar", "cadas": "keren", "calo": "makelar", "can": "belum", "capcus": "pergi", "caper": "cari perhatian", "ce": "cewek", "cekal": "cegah tangkal", "cemen": "penakut", "cengengesan": "tertawa", "cepet": "cepat", "cew": "cewek", "chuyunk": "sayang", "cimeng": "ganja", "cipika cipiki": "cium pipi kanan cium pipi kiri", "ciyh": "sih", "ckepp": "cakep", "ckp": "cakep", "cmiiw": "correct me if i'm wrong", "cmpur": "campur", "cong": "banci", "conlok": "cinta lokasi", "cowwyy": "maaf", "cp": "siapa", "cpe": "capek", "cppe": "capek", "cucok": "cocok", "cuex": "cuek", "cumi": "Cuma miscall", "cups": "culun", "curanmor": "pencurian kendaraan bermotor", "curcol": "curahan hati colongan", "cwek": "cewek", "cyin": "cinta", "d": "di", "dah": "deh", "dapet": "dapat", "de": "adik", "dek": "adik", "demen": "suka", "deyh": "deh", "dgn": "dengan", "diancurin": "dihancurkan", "dimaafin": "dimaafkan", "dimintak": "diminta", "disono": "di sana", "dket": "dekat", "dkk": "dan kawan-kawan", "dll": "dan lain-lain", "dlu": "dulu", "dngn": "dengan", "dodol": "bodoh", "doku": "uang", "dongs": "dong", "dpt": "dapat", "dri": "dari", "drmn": "darimana", "drtd": "dari tadi", "dst": "dan seterusnya", "dtg": "datang", "duh": "aduh", "duren": "durian", "ed": "edisi", "egp": "emang gue pikirin", "eke": "aku", "elu": "kamu", "emangnya": "memangnya", "emng": "memang", "endak": "tidak", "enggak": "tidak", "envy": "iri", "ex": "mantan", "fax": "facsimile", "fifo": "first in first out", "folbek": "follow back", "fyi": "sebagai informasi", "gaada": "tidak ada uang", "gag": "tidak", "gaje": "tidak jelas", "gak papa": "tidak apa-apa", "gan": "juragan", "gaptek": "gagap teknologi", "gatek": "gagap teknologi", "gawe": "kerja", "gbs": "tidak bisa", "gebetan": "orang yang disuka", "geje": "tidak jelas", "gepeng": "gelandangan dan pengemis", "ghiy": "lagi", "gile": "gila", "gimana": "bagaimana", "gino": "gigi nongol", "githu": "gitu", "gj": "tidak jelas", "gmana": "bagaimana", "gn": "begini", "goblok": "bodoh", "golput": "golongan putih", "gowes": "mengayuh sepeda", "gpny": "tidak punya", "gr": "gede rasa", "gretongan": "gratisan", "gtau": "tidak tahu", "gua": "saya", "guoblok": "goblok", "gw": "saya", "ha": "tertawa", "haha": "tertawa", "hallow": "halo", "hankam": "pertahanan dan keamanan", "hehe": "he", "helo": "halo", "hey": "hai", "hlm": "halaman", "hny": "hanya", "hoax": "isu bohong", "hr": "hari", "hrus": "harus", "hubdar": "perhubungan darat", "huff": "mengeluh", "hum": "rumah", "humz": "rumah", "ilang": "hilang", "ilfil": "tidak suka", "imho": "in my humble opinion", "imoetz": "imut", "item": "hitam", "itungan": "hitungan", "iye": "iya", "ja": "saja", "jadiin": "jadi", "jaim": "jaga image", "jayus": "tidak lucu", "jdi": "jadi", "jem": "jam", "jga": "juga", "jgnkan": "jangankan", "jir": "anjing", "jln": "jalan", "jomblo": "tidak punya pacar", "jubir": "juru bicara", "jutek": "galak", "k": "ke", "kab": "kabupaten", "kabor": "kabur", "kacrut": "kacau", "kadiv": "kepala divisi", "kagak": "tidak", "kalo": "kalau", "kampret": "sialan", "kamtibmas": "keamanan dan ketertiban masyarakat", "kamuwh": "kamu", "kanwil": "kantor wilayah", "karna": "karena", "kasubbag": "kepala subbagian", "katrok": "kampungan", "kayanya": "kayaknya", "kbr": "kabar", "kdu": "harus", "kec": "kecamatan", "kejurnas": "kejuaraan nasional", "kekeuh": "keras kepala", "kel": "kelurahan", "kemaren": "kemarin", "kepengen": "mau", "kepingin": "mau", "kepsek": "kepala sekolah", "kesbang": "kesatuan bangsa", "kesra": "kesejahteraan rakyat", "ketrima": "diterima", "kgiatan": "kegiatan", "kibul": "bohong", "kimpoi": "kawin", "kl": "kalau", "klianz": "kalian", "kloter": "kelompok terbang", "klw": "kalau", "km": "kamu", "kmps": "kampus", "kmrn": "kemarin", "knal": "kenal", "knp": "kenapa", "kodya": "kota madya", "komdis": "komisi disiplin", "komsov": "komunis sovyet", "kongkow": "kumpul bareng teman-teman", "kopdar": "kopi darat", "korup": "korupsi", "kpn": "kapan", "krenz": "keren", "krm": "kirim", "kt": "kita", "ktmu": "ketemu", "ktr": "kantor", "kuper": "kurang pergaulan", "kw": "imitasi", "kyk": "seperti", "la": "lah", "lam": "salam", "lamp": "lampiran", "lanud": "landasan udara", "latgab": "latihan gabungan", "lebay": "berlebihan", "leh": "boleh", "lelet": "lambat", "lemot": "lambat", "lgi": "lagi", "lgsg": "langsung", "liat": "lihat", "litbang": "penelitian dan pengembangan", "lmyn": "lumayan", "lo": "kamu", "loe": "kamu", "lola": "lambat berfikir", "louph": "cinta", "low": "kalau", "lp": "lupa", "luber": "langsung, umum, bebas, dan rahasia", "luchuw": "lucu", "lum": "belum", "luthu": "lucu", "lwn": "lawan", "maacih": "terima kasih", "mabal": "bolos", "macem": "macam", "macih": "masih", "maem": "makan", "magabut": "makan gaji buta", "maho": "homo", "mak jang": "kaget", "maksain": "memaksa", "malem": "malam", "mam": "makan", "maneh": "kamu", "maniez": "manis", "mao": "mau", "masukin": "masukkan", "melu": "ikut", "mepet": "dekat sekali", "mgu": "minggu", "migas": "minyak dan gas bumi", "mikol": "minuman beralkohol", "miras": "minuman keras", "mlah": "malah", "mngkn": "mungkin", "mo": "mau", "mokad": "mati", "moso": "masa", "mpe": "sampai", "msk": "masuk", "mslh": "masalah", "mt": "makan teman", "mubes": "musyawarah besar", "mulu": "melulu", "mumpung": "selagi", "munas": "musyawarah nasional", "muntaber": "muntah dan berak", "musti": "mesti", "muupz": "maaf", "mw": "now watching", "n": "dan", "nanam": "menanam", "nanya": "bertanya", "napa": "kenapa", "napi": "narapidana", "napza": "narkotika, alkohol, psikotropika, dan zat adiktif ", "narkoba": "narkotika, psikotropika, dan obat terlarang", "nasgor": "nasi goreng", "nda": "tidak", "ndiri": "sendiri", "ne": "ini", "nekolin": "neokolonialisme", "nembak": "menyatakan cinta", "ngabuburit": "menunggu berbuka puasa", "ngaku": "mengaku", "ngambil": "mengambil", "nganggur": "tidak punya pekerjaan", "ngapah": "kenapa", "ngaret": "terlambat", "ngasih": "memberikan", "ngebandel": "berbuat bandel", "ngegosip": "bergosip", "ngeklaim": "mengklaim", "ngeksis": "menjadi eksis", "ngeles": "berkilah", "ngelidur": "menggigau", "ngerampok": "merampok", "ngga": "tidak", "ngibul": "berbohong", "ngiler": "mau", "ngiri": "iri", "ngisiin": "mengisikan", "ngmng": "bicara", "ngomong": "bicara", "ngubek2": "mencari-cari", "ngurus": "mengurus", "nie": "ini", "nih": "ini", "niyh": "nih", "nmr": "nomor", "nntn": "nonton", "nobar": "nonton bareng", "np": "now playing", "ntar": "nanti", "ntn": "nonton", "numpuk": "bertumpuk", "nutupin": "menutupi", "nyari": "mencari", "nyekar": "menyekar", "nyicil": "mencicil", "nyoblos": "mencoblos", "nyokap": "ibu", "ogah": "tidak mau", "ol": "online", "ongkir": "ongkos kirim", "oot": "out of topic", "org2": "orang-orang", "ortu": "orang tua", "otda": "otonomi daerah", "otw": "on the way, sedang di jalan", "pacal": "pacar", "pake": "pakai", "pala": "kepala", "pansus": "panitia khusus", "parpol": "partai politik", "pasutri": "pasangan suami istri", "pd": "pada", "pede": "percaya diri", "pelatnas": "pemusatan latihan nasional", "pemda": "pemerintah daerah", "pemkot": "pemerintah kota", "pemred": "pemimpin redaksi", "penjas": "pendidikan jasmani", "perda": "peraturan daerah", "perhatiin": "perhatikan", "pesenan": "pesanan", "pgang": "pegang", "pi": "tapi", "pilkada": "pemilihan kepala daerah", "pisan": "sangat", "pk": "penjahat kelamin", "plg": "paling", "pmrnth": "pemerintah", "polantas": "polisi lalu lintas", "ponpes": "pondok pesantren", "pp": "pulang pergi", "prg": "pergi", "prnh": "pernah", "psen": "pesan", "pst": "pasti", "pswt": "pesawat", "pw": "posisi nyaman", "qmu": "kamu", "rakor": "rapat koordinasi", "ranmor": "kendaraan bermotor", "re": "reply", "ref": "referensi", "rehab": "rehabilitasi", "rempong": "sulit", "repp": "balas", "restik": "reserse narkotika", "rhs": "rahasia", "rmh": "rumah", "ru": "baru", "ruko": "rumah toko", "rusunawa": "rumah susun sewa", "ruz": "terus", "saia": "saya", "salting": "salah tingkah", "sampe": "sampai", "samsek": "sama sekali", "sapose": "siapa", "satpam": "satuan pengamanan", "sbb": "sebagai berikut", "sbh": "sebuah", "sbnrny": "sebenarnya", "scr": "secara", "sdgkn": "sedangkan", "sdkt": "sedikit", "se7": "setuju", "sebelas dua belas": "mirip", "sembako": "sembilan bahan pokok", "sempet": "sempat", "sendratari": "seni drama tari", "sgt": "sangat", "shg": "sehingga", "siech": "sih", "sikon": "situasi dan kondisi", "sinetron": "sinema elektronik", "siramin": "siramkan", "sj": "saja", "skalian": "sekalian", "sklh": "sekolah", "skt": "sakit", "slesai": "selesai", "sll": "selalu", "slma": "selama", "slsai": "selesai", "smpt": "sempat", "smw": "semua", "sndiri": "sendiri", "soljum": "sholat jumat", "songong": "sombong", "sory": "maaf", "sosek": "sosial-ekonomi", "sotoy": "sok tahu", "spa": "siapa", "sppa": "siapa", "spt": "seperti", "srtfkt": "sertifikat", "stiap": "setiap", "stlh": "setelah", "suk": "masuk", "sumpek": "sempit", "syg": "sayang", "t4": "tempat", "tajir": "kaya", "tau": "tahu", "taw": "tahu", "td": "tadi", "tdk": "tidak", "teh": "kakak perempuan", "telat": "terlambat", "telmi": "telat berpikir", "temen": "teman", "tengil": "menyebalkan", "tepar": "terkapar", "tggu": "tunggu", "tgu": "tunggu", "thankz": "terima kasih", "thn": "tahun", "tilang": "bukti pelanggaran", "tipiwan": "TvOne", "tks": "terima kasih", "tlp": "telepon", "tls": "tulis", "tmbah": "tambah", "tmen2": "teman-teman", "tmpah": "tumpah", "tmpt": "tempat", "tngu": "tunggu", "tnyta": "ternyata", "tokai": "tai", "toserba": "toko serba ada", "tpi": "tapi", "trdhulu": "terdahulu", "trima": "terima kasih", "trm": "terima", "trs": "terus", "trutama": "terutama", "ts": "penulis", "tst": "tahu sama tahu", "ttg": "tentang", "tuch": "tuh", "tuir": "tua", "tw": "tahu", "u": "kamu", "ud": "sudah", "udah": "sudah", "ujg": "ujung", "ul": "ulangan", "unyu": "lucu", "uplot": "unggah", "urang": "saya", "usah": "perlu", "utk": "untuk", "valas": "valuta asing", "w/": "dengan", "wadir": "wakil direktur", "wamil": "wajib militer", "warkop": "warung kopi", "warteg": "warung tegal", "wat": "buat", "wkt": "waktu", "wtf": "what the fuck", "xixixi": "tertawa", "ya": "iya", "yap": "iya", "yaudah": "ya sudah", "yawdah": "ya sudah", "yg": "yang", "yl": "yang lain", "yo": "iya", "yowes": "ya sudah", "yup": "iya", "7an": "tujuan", "ababil": "abg labil", "acc": "accord", "adlah": "adalah", "adoh": "aduh", "aha": "tertawa", "aing": "saya", "aja": "saja", "ajj": "saja", "aka": "dikenal juga sebagai", "akko": "aku", "akku": "aku", "akyu": "aku", "aljasa": "asal jadi saja", "ama": "sama", "ambl": "ambil", "anjir": "anjing", "ank": "anak", "ap": "apa", "apaan": "apa", "ape": "apa", "aplot": "unggah", "apva": "apa", "aqu": "aku", "asap": "sesegera mungkin", "aseek": "asyik", "asek": "asyik", "aseknya": "asyiknya", "asoy": "asyik", "astrojim": "astagfirullahaladzim", "ath": "kalau begitu", "atuh": "kalau begitu", "ava": "avatar", "aws": "awas", "ayang": "sayang", "ayok": "ayo", "bacot": "banyak bicara", "bales": "balas", "bangdes": "pembangunan desa", "bangkotan": "tua", "banpres": "bantuan presiden", "bansarkas": "bantuan sarana kesehatan", "bazis": "badan amal, zakat, infak, dan sedekah", "bcoz": "karena", "beb": "sayang", "bejibun": "banyak", "belom": "belum", "bener": "benar", "ber2": "berdua", "berdikari": "berdiri di atas kaki sendiri", "bet": "banget", "beti": "beda tipis", "beut": "banget", "bgd": "banget", "bgs": "bagus", "bhubu": "tidur", "bimbuluh": "bimbingan dan penyuluhan", "bisi": "kalau-kalau", "bkn": "bukan", "bl": "beli", "blg": "bilang", "blm": "belum", "bls": "balas", "bnchi": "benci", "bngung": "bingung", "bnyk": "banyak", "bohay": "badan aduhai", "bokep": "porno", "bokin": "pacar", "bole": "boleh", "bolot": "bodoh", "bonyok": "ayah ibu", "bpk": "bapak", "brb": "segera kembali", "brngkt": "berangkat", "brp": "berapa", "brur": "saudara laki-laki", "bsa": "bisa", "bsk": "besok", "bu_bu": "tidur", "bubarin": "bubarkan", "buber": "buka bersama", "bujubune": "luar biasa", "buser": "buru sergap", "bwhn": "bawahan", "byar": "bayar", "byr": "bayar", "c8": "chat", "cabut": "pergi", "caem": "cakep", "cama-cama": "sama-sama", "cangcut": "celana dalam", "cape": "capek", "caur": "jelek", "cekak": "tidak ada uang", "cekidot": "coba lihat", "cemplungin": "cemplungkan", "ceper": "pendek", "ceu": "kakak perempuan", "cewe": "cewek", "cibuk": "sibuk", "cin": "cinta", "ciye": "cie", "ckck": "ck", "clbk": "cinta lama bersemi kembali", "cmpr": "campur", "cnenk": "senang", "congor": "mulut", "cow": "cowok", "coz": "karena", "cpa": "siapa", "gokil": "gila", "gombal": "suka merayu", "gpl": "tidak pakai lama", "gpp": "tidak apa-apa", "gretong": "gratis", "gt": "begitu", "gtw": "tidak tahu", "gue": "saya", "guys": "teman-teman", "gws": "cepat sembuh", "haghaghag": "tertawa", "hakhak": "tertawa", "handak": "bahan peledak", "hansip": "pertahanan sipil", "hellow": "halo", "helow": "halo", "hi": "hai", "hlng": "hilang", "hnya": "hanya", "houm": "rumah", "hrs": "harus", "hubad": "hubungan angkatan darat", "hubla": "perhubungan laut", "huft": "mengeluh", "humas": "hubungan masyarakat", "idk": "saya tidak tahu", "ilfeel": "tidak suka", "imba": "jago sekali", "imoet": "imut", "info": "informasi", "itung": "hitung", "isengin": "bercanda", "iyala": "iya lah", "iyo": "iya", "jablay": "jarang dibelai", "jadul": "jaman dulu", "jancuk": "anjing", "jd": "jadi", "jdikan": "jadikan", "jg": "juga", "jgn": "jangan", "jijay": "jijik", "jkt": "jakarta", "jnj": "janji", "jth": "jatuh", "jurdil": "jujur adil", "jwb": "jawab", "ka": "kakak", "kabag": "kepala bagian", "kacian": "kasihan", "kadit": "kepala direktorat", "kaga": "tidak", "kaka": "kakak", "kamtib": "keamanan dan ketertiban", "kamuh": "kamu", "kamyu": "kamu", "kapt": "kapten", "kasat": "kepala satuan", "kasubbid": "kepala subbidang", "kau": "kamu", "kbar": "kabar", "kcian": "kasihan", "keburu": "terlanjur", "kedubes": "kedutaan besar", "kek": "seperti", "keknya": "kayaknya", "keliatan": "kelihatan", "keneh": "masih", "kepikiran": "terpikirkan", "kepo": "mau tahu urusan orang", "kere": "tidak punya uang", "kesian": "kasihan", "ketauan": "ketahuan", "keukeuh": "keras kepala", "khan": "kan", "kibus": "kaki busuk", "kk": "kakak", "klian": "kalian", "klo": "kalau", "kluarga": "keluarga", "klwrga": "keluarga", "kmari": "kemari", "kmpus": "kampus", "kn": "kan", "knl": "kenal", "knpa": "kenapa", "kog": "kok", "kompi": "komputer", "komtiong": "komunis Tiongkok", "konjen": "konsulat jenderal", "koq": "kok", "kpd": "kepada", "kptsan": "keputusan", "krik": "garing", "krn": "karena", "ktauan": "ketahuan", "ktny": "katanya", "kudu": "harus", "kuq": "kok", "ky": "seperti", "kykny": "kayanya", "laka": "kecelakaan", "lambreta": "lambat", "lansia": "lanjut usia", "lapas": "lembaga pemasyarakatan", "lbur": "libur", "lekong": "laki-laki", "lg": "lagi", "lgkp": "lengkap", "lht": "lihat", "linmas": "perlindungan masyarakat", "lmyan": "lumayan", "lngkp": "lengkap", "loch": "loh", "lol": "tertawa", "lom": "belum", "loupz": "cinta", "lowh": "kamu", "lu": "kamu", "luchu": "lucu", "luff": "cinta", "luph": "cinta", "lw": "kamu", "lwt": "lewat", "maaciw": "terima kasih", "mabes": "markas besar", "macem-macem": "macam-macam", "madesu": "masa depan suram", "maen": "main", "mahatma": "maju sehat bersama", "mak": "ibu", "makasih": "terima kasih", "malah": "bahkan", "malu2in": "memalukan", "mamz": "makan", "manies": "manis", "mantep": "mantap", "markus": "makelar kasus", "mba": "mbak", "mending": "lebih baik", "mgkn": "mungkin", "mhn": "mohon", "miker": "minuman keras", "milis": "mailing list", "mksd": "maksud", "mls": "malas", "mnt": "minta", "moge": "motor gede", "mokat": "mati", "mosok": "masa", "msh": "masih", "mskpn": "meskipun", "msng2": "masing-masing", "muahal": "mahal", "muker": "musyawarah kerja", "mumet": "pusing", "muna": "munafik", "munaslub": "musyawarah nasional luar biasa", "musda": "musyawarah daerah", "muup": "maaf", "muuv": "maaf", "nal": "kenal", "nangis": "menangis", "naon": "apa", "napol": "narapidana politik", "naq": "anak", "narsis": "bangga pada diri sendiri", "nax": "anak", "ndak": "tidak", "ndut": "gendut", "nekolim": "neokolonialisme", "nelfon": "menelepon", "ngabis2in": "menghabiskan", "ngakak": "tertawa", "ngambek": "marah", "ngampus": "pergi ke kampus", "ngantri": "mengantri", "ngapain": "sedang apa", "ngaruh": "berpengaruh", "ngawur": "berbicara sembarangan", "ngeceng": "kumpul bareng-bareng", "ngeh": "sadar", "ngekos": "tinggal di kos", "ngelamar": "melamar", "ngeliat": "melihat", "ngemeng": "bicara terus-terusan", "ngerti": "mengerti", "nggak": "tidak", "ngikut": "ikut", "nginep": "menginap", "ngisi": "mengisi", "ngmg": "bicara", "ngocol": "lucu", "ngomongin": "membicarakan", "ngumpul": "berkumpul", "ni": "ini", "nyasar": "tersesat", "nyariin": "mencari", "nyiapin": "mempersiapkan", "nyiram": "menyiram", "nyok": "ayo", "o/": "oleh", "ok": "ok", "priksa": "periksa", "pro": "profesional", "psn": "pesan", "psti": "pasti", "puanas": "panas", "qmo": "kamu", "qt": "kita", "rame": "ramai", "raskin": "rakyat miskin", "red": "redaksi", "reg": "register", "rejeki": "rezeki", "renstra": "rencana strategis", "reskrim": "reserse kriminal", "sni": "sini", "somse": "sombong sekali", "sorry": "maaf", "sosbud": "sosial-budaya", "sospol": "sosial-politik", "sowry": "maaf", "spd": "sepeda", "sprti": "seperti", "spy": "supaya", "stelah": "setelah", "subbag": "subbagian", "sumbangin": "sumbangkan", "sy": "saya", "syp": "siapa", "tabanas": "tabungan pembangunan nasional", "tar": "nanti", "taun": "tahun", "tawh": "tahu", "tdi": "tadi", "te2p": "tetap", "tekor": "rugi", "telkom": "telekomunikasi", "telp": "telepon", "temen2": "teman-teman", "tengok": "menjenguk", "terbitin": "terbitkan", "tgl": "tanggal", "thanks": "terima kasih", "thd": "terhadap", "thx": "terima kasih", "tipi": "TV", "tkg": "tukang", "tll": "terlalu", "tlpn": "telepon", "tman": "teman", "tmbh": "tambah", "tmn2": "teman-teman", "tmph": "tumpah", "tnda": "tanda", "tnh": "tanah", "togel": "toto gelap", "tp": "tapi", "tq": "terima kasih", "trgntg": "tergantung", "trims": "terima kasih", "cb": "coba", "y": "ya", "munfik": "munafik", "reklamuk": "reklamasi", "sma": "sama", "tren": "trend", "ngehe": "kesal", "mz": "mas", "analisise": "analisis", "sadaar": "sadar", "sept": "september", "nmenarik": "menarik", "zonk": "bodoh", "rights": "benar", "simiskin": "miskin", "ngumpet": "sembunyi", "hardcore": "keras", "akhirx": "akhirnya", "solve": "solusi", "watuk": "batuk", "ngebully": "intimidasi", "masy": "masyarakat", "still": "masih", "tauk": "tahu", "mbual": "bual", "tioghoa": "tionghoa", "ngentotin": "senggama", "kentot": "senggama", "faktakta": "fakta", "sohib": "teman", "rubahnn": "rubah", "trlalu": "terlalu", "nyela": "cela", "heters": "pembenci", "nyembah": "sembah", "most": "paling", "ikon": "lambang", "light": "terang", "pndukung": "pendukung", "setting": "atur", "seting": "akting", "next": "lanjut", "waspadalah": "waspada", "gantengsaya": "ganteng", "parte": "partai", "nyerang": "serang", "nipu": "tipu", "ktipu": "tipu", "jentelmen": "berani", "buangbuang": "buang", "tsangka": "tersangka", "kurng": "kurang", "ista": "nista", "less": "kurang", "koar": "teriak", "paranoid": "takut", "problem": "masalah", "tahi": "kotoran", "tirani": "tiran", "tilep": "tilap", "happy": "bahagia", "tak": "tidak", "penertiban": "tertib", "uasai": "kuasa", "mnolak": "tolak", "trending": "trend", "taik": "tahi", "wkwkkw": "tertawa", "ahokncc": "ahok", "istaa": "nista", "benarjujur": "jujur", "mgkin": "mungkin"}
def fix_slangwords(text):
    words = text.split()
    fixed_words = []

    for word in words:
        if word.lower() in slangwords:
            fixed_words.append(slangwords[word.lower()])
        else:
            fixed_words.append(word)

    fixed_text = ' '.join(fixed_words)
    return fixed_text

**Mengaplikasikan semua function ke clean_df**

In [9]:
# Membersihkan teks dan menyimpannya di kolom 'text_clean'
clean_df['text_clean'] = clean_df['content'].apply(cleaningText)

# Mengubah huruf dalam teks menjadi huruf kecil dan menyimpannya di 'text_casefoldingText'
clean_df['text_casefoldingText'] = clean_df['text_clean'].apply(casefoldingText)

# Mengganti kata-kata slang dengan kata-kata standar dan menyimpannya di 'text_slangwords'
clean_df['text_slangwords'] = clean_df['text_casefoldingText'].apply(fix_slangwords)

# Memecah teks menjadi token (kata-kata) dan menyimpannya di 'text_tokenizingText'
clean_df['text_tokenizingText'] = clean_df['text_slangwords'].apply(tokenizingText)

# Menghapus kata-kata stop (kata-kata umum) dan menyimpannya di 'text_stopword'
clean_df['text_stopword'] = clean_df['text_tokenizingText'].apply(filteringText)

# Menggabungkan token-token menjadi kalimat dan menyimpannya di 'text_akhir'
clean_df['text_akhir'] = clean_df['text_stopword'].apply(toSentence)

In [10]:
clean_df.head()

Unnamed: 0,userName,score,content,text_clean,text_casefoldingText,text_slangwords,text_tokenizingText,text_stopword,text_akhir
0,Bukan Robot,1,aplikasi terjelek kebanyakan nonton iklan nya ...,aplikasi terjelek kebanyakan nonton iklan nya ...,aplikasi terjelek kebanyakan nonton iklan nya ...,aplikasi terjelek kebanyakan nonton iklan nya ...,"[aplikasi, terjelek, kebanyakan, nonton, iklan...","[aplikasi, terjelek, kebanyakan, nonton, iklan...",aplikasi terjelek kebanyakan nonton iklan dera...
1,marsha Tea,1,jangan kebanyakan iklan kasian yang ga berlang...,jangan kebanyakan iklan kasian yang ga berlang...,jangan kebanyakan iklan kasian yang ga berlang...,jangan kebanyakan iklan kasian yang ga berlang...,"[jangan, kebanyakan, iklan, kasian, yang, ga, ...","[kebanyakan, iklan, kasian, berlangganan, vip]",kebanyakan iklan kasian berlangganan vip
2,Wardi Jafar,1,Gak ada film indonesia jelek,Gak ada film indonesia jelek,gak ada film indonesia jelek,gak ada film indonesia jelek,"[gak, ada, film, indonesia, jelek]","[film, indonesia, jelek]",film indonesia jelek
3,Nen _tarak92,1,Aplikasi nya kenapa yah setelah di update kok ...,Aplikasi nya kenapa yah setelah di update kok ...,aplikasi nya kenapa yah setelah di update kok ...,aplikasi nya kenapa yah setelah di update kok ...,"[aplikasi, nya, kenapa, yah, setelah, di, upda...","[aplikasi, yah, update]",aplikasi yah update
4,Alimurrosyid Budi Rohmansyah,1,burik,burik,burik,buruk,[buruk],[buruk],buruk


### **Pelabelan Review apakah positif, neutral, atau negatif**

In [11]:
# Membaca data kamus kata-kata positif dari GitHub
lexicon_positive = dict()

response = requests.get('https://raw.githubusercontent.com/angelmetanosaa/dataset/main/lexicon_positive.csv')
# Mengirim permintaan HTTP untuk mendapatkan file CSV dari GitHub

if response.status_code == 200:
    # Jika permintaan berhasil
    reader = csv.reader(StringIO(response.text), delimiter=',')
    # Membaca teks respons sebagai file CSV menggunakan pembaca CSV dengan pemisah koma

    for row in reader:
        # Mengulangi setiap baris dalam file CSV
        lexicon_positive[row[0]] = int(row[1])
        # Menambahkan kata-kata positif dan skornya ke dalam kamus lexicon_positive
else:
    print("Failed to fetch positive lexicon data")

# Membaca data kamus kata-kata negatif dari GitHub
lexicon_negative = dict()

response = requests.get('https://raw.githubusercontent.com/angelmetanosaa/dataset/main/lexicon_negative.csv')
# Mengirim permintaan HTTP untuk mendapatkan file CSV dari GitHub

if response.status_code == 200:
    # Jika permintaan berhasil
    reader = csv.reader(StringIO(response.text), delimiter=',')
    # Membaca teks respons sebagai file CSV menggunakan pembaca CSV dengan pemisah koma

    for row in reader:
        # Mengulangi setiap baris dalam file CSV
        lexicon_negative[row[0]] = int(row[1])
        # Menambahkan kata-kata negatif dan skornya dalam kamus lexicon_negative
else:
    print("Failed to fetch negative lexicon data")

**Melakukan analisis sentimen pada teks berbahasa Indonesia menggunakan kamus kata-kata positif dan negatif.**

In [12]:
# Fungsi untuk menentukan polaritas sentimen dari tweet

def sentiment_analysis_lexicon_indonesia(text):
    #for word in text:

    score = 0
    # Inisialisasi skor sentimen ke 0

    for word in text:
        # Mengulangi setiap kata dalam teks

        if (word in lexicon_positive):
            score = score + lexicon_positive[word]
            # Jika kata ada dalam kamus positif, tambahkan skornya ke skor sentimen

    for word in text:
        # Mengulangi setiap kata dalam teks (sekali lagi)

        if (word in lexicon_negative):
            score = score + lexicon_negative[word]
            # Jika kata ada dalam kamus negatif, kurangkan skornya dari skor sentimen

    polarity=''
    # Inisialisasi variabel polaritas

    if (score > 0.5):
        polarity = 'positive'
        # Jika skor sentimen lebih besar atau sama dengan 0, maka polaritas adalah positif
    elif (score < -0.5):
        polarity = 'negative'
        # Jika skor sentimen kurang dari 0, maka polaritas adalah negatif
    else:
        polarity = 'neutral'
    # Ini adalah bagian yang bisa digunakan untuk menentukan polaritas netral jika diperlukan

    return score, polarity
    # Mengembalikan skor sentimen dan polaritas teks

**Aplikasi polarity ke clean_df**

In [13]:
results = clean_df['text_stopword'].apply(sentiment_analysis_lexicon_indonesia)
results = list(zip(*results))
clean_df['polarity_score'] = results[0]
clean_df['polarity'] = results[1]
print(clean_df['polarity'].value_counts())

polarity
negative    5583
positive    3725
neutral     2689
Name: count, dtype: int64


## **Ekstraksi Fitur**

**Menggunakan TF-IDF**

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

# Pisahkan data menjadi fitur (tweet) dan label (sentimen)
X = clean_df['text_akhir']
y = clean_df['polarity']

# Ekstraksi fitur dengan TF-IDF
tfidf = TfidfVectorizer(ngram_range=(1,2), max_features=5000, min_df=17, max_df=0.8 ) # Baru
# tfidf = TfidfVectorizer(max_features=200, min_df=17, max_df=0.8 ) # Asal
X_tfidf = tfidf.fit_transform(X)

# Konversi hasil ekstraksi fitur menjadi dataframe
features_tfidf_df = pd.DataFrame(X_tfidf.toarray(), columns=tfidf.get_feature_names_out())

# Menampilkan hasil ekstraksi fitur
features_tfidf_df

# Bagi data menjadi data latih dan data uji
X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

**Menggunakan BERT (Bidirectional Encoder Representations from Transformers)**

In [15]:
from transformers import BertTokenizer, BertModel
import torch


In [16]:
# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Menghitung panjang token untuk setiap teks dalam dataset
token_lengths = clean_df['text_akhir'].apply(lambda x: len(tokenizer.tokenize(x)))

# Menampilkan 5 teks dengan panjang token terbesar
longest_texts = clean_df.iloc[token_lengths.idxmax()]  # Ambil teks terpanjang berdasarkan token length
print("Teks terpanjang:")
print(longest_texts)

# Menampilkan statistik distribusi panjang token
print("Statistik panjang token:")
print(token_lengths.describe())

# Menampilkan panjang token teks dengan panjang token terbesar
print("Panjang token teks terpanjang:", token_lengths.max())

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Teks terpanjang:
userName                                             Quthubul Aktab Putra
score                                                                   5
content                 Wetv top bgt,tapi kalo bisa cepatin dong upgra...
text_clean              Wetv top bgttapi kalo bisa cepatin dong upgrad...
text_casefoldingText    wetv top bgttapi kalo bisa cepatin dong upgrad...
text_slangwords         wetv top bgttapi kalau bisa cepatin dong upgra...
text_tokenizingText     [wetv, top, bgttapi, kalau, bisa, cepatin, don...
text_stopword           [wetv, top, bgttapi, cepatin, upgrade, film, a...
text_akhir              wetv top bgttapi cepatin upgrade film akufilm ...
polarity_score                                                          5
polarity                                                         positive
Name: 11490, dtype: object
Statistik panjang token:
count    11997.000000
mean        17.116779
std         16.930722
min          0.000000
25%          5.000000
50%    

In [17]:
# Memuat tokenizer dan model pre-trained BERT
tokenizer = BertTokenizer.from_pretrained('indolem/indobert-base-uncased')  # Gunakan model Indonesia jika diperlukan
model = BertModel.from_pretrained('indolem/indobert-base-uncased')

# Tentukan perangkat (GPU atau CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Pindahkan model ke perangkat yang sesuai (GPU atau CPU)
model = model.to(device)

# Fungsi untuk mengubah teks menjadi vektor menggunakan BERT
def encode(text):
    # Tokenisasi teks dan konversi ke tensor
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=137)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # Forward pass tanpa gradient
    with torch.no_grad():
        outputs = model(**inputs)

    # Ambil [CLS] token dan mean pooling dari last hidden state
    last_hidden_state = outputs.last_hidden_state  # Shape: (1, seq_len, hidden_size)
    cls_token = last_hidden_state[:, 0, :]  # Ambil token [CLS], shape: (1, hidden_size)
    mean_pooling = last_hidden_state.mean(dim=1)  # Mean pooling, shape: (1, hidden_size)

    # Gabungkan keduanya jadi 1 vektor (shape: (1, 1536))
    combined = torch.cat((cls_token, mean_pooling), dim=1)

    # Kembalikan sebagai numpy array
    return combined.detach().cpu().numpy()

# Ekstraksi fitur dari kolom 'text_akhir' di clean_df
X_bert = np.array([encode(text) for text in clean_df['text_akhir']])

# Melihat hasilnya
print(X_bert.shape)  # Menampilkan ukuran vektor hasil ekstraksi fitur
print(X_bert)  # Menampilkan vektor hasil ekstraksi fitur

X_bert_reshaped = X_bert.squeeze(axis=1)

# Menambahkan hasil ekstraksi BERT ke dalam DataFrame
df_bert = pd.DataFrame(X_bert_reshaped)
df_bert.columns = [f'feature_{i}' for i in range(X_bert_reshaped.shape[1])]

# Gabungkan DataFrame asli dengan fitur BERT
bert_df_combined = pd.concat([clean_df, df_bert], axis=1)

# Menampilkan DataFrame yang telah digabungkan
print(bert_df_combined.head())

(11997, 1, 1536)
[[[ 0.3075569   0.24962315 -0.9208204  ... -1.1219256  -0.9290605
    1.0648259 ]]

 [[-1.7188449  -0.4695904  -1.0507127  ... -0.37796965 -1.1558845
    0.6552159 ]]

 [[ 0.06584504  0.27519247 -1.5227158  ... -1.1146705  -0.83636093
    0.8185787 ]]

 ...

 [[ 0.20543647  0.28220174 -1.3938137  ... -0.05330058  0.25038487
    1.177689  ]]

 [[-0.09421083  0.25189725  0.3470116  ... -1.0904464  -0.08978462
   -0.19376439]]

 [[-0.01185349 -0.7163656  -0.74748695 ... -0.6421587   0.1102407
   -0.25964674]]]
                       userName  score  \
0                   Bukan Robot    1.0   
1                    marsha Tea    1.0   
2                   Wardi Jafar    1.0   
3                  Nen _tarak92    1.0   
4  Alimurrosyid Budi Rohmansyah    1.0   

                                             content  \
0  aplikasi terjelek kebanyakan nonton iklan nya ...   
1  jangan kebanyakan iklan kasian yang ga berlang...   
2                       Gak ada film indonesia je

# **Percobaan Model**

#### **Random Forest TF-IDF dan BERT**

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

**Training Model Random Forest dengan Fitur TF-IDF**

In [19]:
from sklearn.preprocessing import LabelEncoder

# Inisialisasi LabelEncoder
le = LabelEncoder()

# Transform label (y) ke bentuk numerik
y_encoded = le.fit_transform(y)

In [20]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
import numpy as np

X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test = train_test_split(X_tfidf, y_encoded, test_size=0.2, random_state=42)

# Inisialisasi model Random Forest
rf_tfidf = RandomForestClassifier(n_estimators=1000, max_depth=50, min_samples_split=5, random_state=42)

# Melakukan cross-validation dengan 5 fold
cv_scores = cross_val_score(rf_tfidf, X_tfidf, y, cv=5, scoring='accuracy')

# Menampilkan hasil cross-validation
print(f"Cross-validation accuracy scores: {cv_scores}")
print(f"Mean cross-validation accuracy: {cv_scores.mean()}")

Cross-validation accuracy scores: [0.795      0.79916667 0.79866611 0.80408504 0.79283035]
Mean cross-validation accuracy: 0.797949631791024


In [21]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# Bagi data menjadi data latih dan data uji (80/20)
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Inisialisasi model Logistic Regression dengan parameter terbaik
best_model = LogisticRegression(C=4.8, max_iter=200, solver='lbfgs', multi_class='ovr', random_state=42)

# Latih model dengan data latih
best_model.fit(X_train, y_train)

# Prediksi hasil pada data uji
y_best_pred = best_model.predict(X_test)

# Evaluasi model dengan akurasi
print("Evaluasi Model Logistic Regression dengan Parameter Terbaik:")
print("Akurasi:", accuracy_score(y_test, y_best_pred))

Evaluasi Model Logistic Regression dengan Parameter Terbaik:
Akurasi: 0.86




In [22]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [4.5, 4.8, 4.3],
    'solver': ['liblinear', 'lbfgs'],
    'max_iter': [200, 180, 220],
}

grid_search = GridSearchCV(LogisticRegression(multi_class='ovr', random_state=42), param_grid, cv=5)
grid_search.fit(X_tfidf, y)
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)



Best parameters found:  {'C': 4.8, 'max_iter': 200, 'solver': 'lbfgs'}
Best cross-validation score:  0.8627999166319299




**Training Model Random Forest dengan Fitur BERT**

In [23]:
# Bagi data menjadi data latih dan data uji (80/20)
X_bert_train, X_bert_test, y_bert_train, y_bert_test = train_test_split(X_bert_reshaped, y, test_size=0.2, random_state=42)

# Inisialisasi model Random Forest
rf_bert = RandomForestClassifier(n_estimators=500, max_depth=50, min_samples_split=20, random_state=42)

# Melatih model dengan data latih
rf_bert.fit(X_bert_train, y_bert_train)

# Prediksi hasil pada data uji
y_bert_pred = rf_bert.predict(X_bert_test)

# Menampilkan hasil evaluasi
print("Evaluasi Model Random Forest dengan Fitur BERT:")
print("Akurasi:", accuracy_score(y_bert_test, y_bert_pred))

Evaluasi Model Random Forest dengan Fitur BERT:
Akurasi: 0.635


**Menggunakan Algoritma Deep Learning dengan TF-IDF**

In [24]:
# Bagi data menjadi data latih dan data uji (80/20)
X_dl_train, X_dl_test, y_dl_train, y_dl_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

**Encode kategori dengan label encoder**

In [25]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Inisialisasi encoder
label_encoder = LabelEncoder()

# Fit dan transform label ke bentuk numerik
y_dl_train = label_encoder.fit_transform(y_dl_train)
y_dl_test = label_encoder.transform(y_dl_test)

# Langkah 2: Ubah ke one-hot
y_dl_train_oh = to_categorical(y_dl_train)
y_dl_test_oh = to_categorical(y_dl_test)

In [26]:
X_dl_train = X_dl_train.toarray().astype('float32')
X_dl_test = X_dl_test.toarray().astype('float32')

In [27]:
from sklearn.decomposition import TruncatedSVD
# Misalnya ingin kurangi ke 100 fitur
svd = TruncatedSVD(n_components=400, random_state=42)

# Fit di training, transform di training dan testing
X_train_svd = svd.fit_transform(X_dl_train)
X_test_svd = svd.transform(X_dl_test)

In [28]:
import tensorflow as tf
from tensorflow.keras import regularizers

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0001), input_shape=(X_tfidf.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    # tf.keras.layers.ReLU(),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    tf.keras.layers.BatchNormalization(),
    # tf.keras.layers.ReLU(),
    tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Dropout(0.4),

    # tf.keras.layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.0001)),
    # tf.keras.layers.LeakyReLU(alpha=0.1),
    tf.keras.layers.Dense(3, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [29]:
print(type(X_dl_train))

<class 'numpy.ndarray'>


In [30]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, mode='max', restore_best_weights=True)
]

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0003), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# history3 = model.fit(X_dl_train, y_dl_train, epochs=100, batch_size=16, callbacks=callbacks, validation_data=(X_dl_test, y_dl_test))
history3 = model.fit(X_dl_train, y_dl_train, epochs=100, batch_size=128, validation_data=(X_dl_test, y_dl_test))
#epoch 20 = 0.8745
#epoch 30 = 0.9023
#epoch 40 dropout 0.5 = 0.8957 0.7844
#epoch 40 dropout 0.3 = 0.9237 0.7783
#epoch 40 dropout 0.3 L2 regu = 0.8974 0.7828
#epoch 40 dropout 0.3 L2 regu Smaller Dense = 0.8524 0.7964
#epoch 100 Early stoping Layer 128 64 = accuracy: 0.8541 0.7986
#epoch 100 Early stoping Layer 256 128 =

Epoch 1/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.3512 - loss: 1.5232 - val_accuracy: 0.3100 - val_loss: 1.1126
Epoch 2/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.4468 - loss: 1.2153 - val_accuracy: 0.3100 - val_loss: 1.1112
Epoch 3/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5129 - loss: 1.0685 - val_accuracy: 0.3688 - val_loss: 1.1062
Epoch 4/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5631 - loss: 0.9526 - val_accuracy: 0.4950 - val_loss: 1.0915
Epoch 5/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6174 - loss: 0.8462 - val_accuracy: 0.6617 - val_loss: 1.0457
Epoch 6/100
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.6514 - loss: 0.7999 - val_accuracy: 0.7121 - val_loss: 0.9547
Epoch 7/100
[1m75/75[0m [32m━━

**One Hot Encoding**

In [31]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=25, mode='max', restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, mode='max')
]

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])

# history76 = model.fit(X_dl_train, y_dl_train_oh, epochs=100, batch_size=128, callbacks=callbacks, validation_data=(X_dl_test, y_dl_test_oh))
history76 = model.fit(X_dl_train, y_dl_train_oh, epochs=100, batch_size=32, validation_data=(X_dl_test, y_dl_test_oh))

Epoch 1/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9407 - loss: 0.2158 - val_accuracy: 0.8308 - val_loss: 0.7515
Epoch 2/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9443 - loss: 0.2166 - val_accuracy: 0.8300 - val_loss: 0.7621
Epoch 3/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9335 - loss: 0.2418 - val_accuracy: 0.8242 - val_loss: 0.7525
Epoch 4/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9361 - loss: 0.2228 - val_accuracy: 0.8238 - val_loss: 0.7781
Epoch 5/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9365 - loss: 0.2274 - val_accuracy: 0.8342 - val_loss: 0.7506
Epoch 6/100
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9341 - loss: 0.2413 - val_accuracy: 0.8279 - val_loss: 0.7284
Epoch 7/100
[1m300/30

# **Final Model**

**Setelah Fine tuning akhirnya mencapai 85% accuracy**

**Stratified K-Fold**

In [33]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.decomposition import TruncatedSVD

# 1. Encode label
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # y: label asli
y_oh = to_categorical(y_encoded)

# 2. Konversi fitur ke dense jika masih dalam bentuk sparse matrix
X_array = X_tfidf.toarray().astype('float32')

# X_array = X_bert_reshaped.astype('float32')

# Misalnya ingin kurangi ke 100 fitur
svd = TruncatedSVD(n_components=400, random_state=42)

# Fit di training, transform di training dan testing
X_svd = svd.fit_transform(X_array)

# 3. Inisialisasi Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

val_accuracies = []

# 4. Loop training per fold
for fold, (train_idx, val_idx) in enumerate(skf.split(X_array, y_encoded)):
    print(f"\nFold {fold+1}")

    # X_train_fold, X_val_fold = X_svd[train_idx], X_svd[val_idx]
    X_train_fold, X_val_fold = X_array[train_idx], X_array[val_idx]
    y_train_fold, y_val_fold = y_oh[train_idx], y_oh[val_idx]

    # 5. Bangun model baru untuk setiap fold
    model = tf.keras.models.Sequential([
    # Layer pertama dengan Regularisasi L2 dan BatchNormalization
      tf.keras.layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(0.0005), input_shape=(X_array.shape[1],), use_bias=False),
      # tf.keras.layers.Dense(128, activation=None, kernel_regularizer=regularizers.l2(0.0005), input_shape=(X_svd.shape[1],), use_bias=False),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.LeakyReLU(alpha=0.1),
      tf.keras.layers.Dropout(0.2),

      # Layer kedua dengan Regularisasi L2 dan BatchNormalization
      tf.keras.layers.Dense(64, activation=None, kernel_regularizer=regularizers.l2(0.0005), use_bias=False),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.LeakyReLU(alpha=0.1),
      tf.keras.layers.Dropout(0.2),

      # Layer ketiga dengan Regularisasi L2
      tf.keras.layers.Dense(32, activation=None, kernel_regularizer=regularizers.l2(0.0005), use_bias=False),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.LeakyReLU(alpha=0.1),
      tf.keras.layers.Dropout(0.2),

      # Output Layer dengan softmax untuk multi-class classification
      tf.keras.layers.Dense(3, activation='softmax')
    ])

    # 6. Compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-4, decay=1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # 7. Callbacks
    callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, mode='max'),
        ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max')
    ]

    # 8. Train model
    history = model.fit(
        X_train_fold, y_train_fold,
        epochs=150,
        batch_size=32,
        validation_data=(X_val_fold, y_val_fold),
        callbacks=callbacks,
        verbose=1
    )

    # 9. Simpan akurasi
    val_acc = max(history.history['val_accuracy'])
    val_accuracies.append(val_acc)
    print(f"Fold {fold+1} - Best Val Accuracy: {val_acc:.4f}")

# 10. Rata-rata akurasi dari semua fold
print(f"\nAverage Validation Accuracy: {np.mean(val_accuracies):.4f}")


Fold 1
Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m294/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.4567 - loss: 1.2860



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.4592 - loss: 1.2815 - val_accuracy: 0.2408 - val_loss: 1.3257 - learning_rate: 5.0000e-04
Epoch 2/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7591 - loss: 0.7600



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.7591 - loss: 0.7599 - val_accuracy: 0.8125 - val_loss: 0.6730 - learning_rate: 5.0000e-04
Epoch 3/150
[1m299/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8252 - loss: 0.6294



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8252 - loss: 0.6292 - val_accuracy: 0.8575 - val_loss: 0.5793 - learning_rate: 5.0000e-04
Epoch 4/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8584 - loss: 0.5410



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8584 - loss: 0.5411 - val_accuracy: 0.8579 - val_loss: 0.5670 - learning_rate: 5.0000e-04
Epoch 5/150
[1m298/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8727 - loss: 0.5123



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8727 - loss: 0.5123 - val_accuracy: 0.8642 - val_loss: 0.5614 - learning_rate: 5.0000e-04
Epoch 6/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8907 - loss: 0.4594 - val_accuracy: 0.8642 - val_loss: 0.5588 - learning_rate: 5.0000e-04
Epoch 7/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9016 - loss: 0.4249 - val_accuracy: 0.8629 - val_loss: 0.5605 - learning_rate: 5.0000e-04
Epoch 8/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9098 - loss: 0.4041 - val_accuracy: 0.8608 - val_loss: 0.5667 - learning_rate: 5.0000e-04
Epoch 9/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9134 - loss: 0.3904 - val_accuracy: 0.8512 - val_loss: 0.5882 - learning



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.5120 - loss: 1.2006 - val_accuracy: 0.2529 - val_loss: 1.2993 - learning_rate: 5.0000e-04
Epoch 2/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7664 - loss: 0.7568



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7666 - loss: 0.7564 - val_accuracy: 0.8179 - val_loss: 0.6825 - learning_rate: 5.0000e-04
Epoch 3/150
[1m291/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8377 - loss: 0.5973



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8377 - loss: 0.5974 - val_accuracy: 0.8396 - val_loss: 0.5972 - learning_rate: 5.0000e-04
Epoch 4/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8632 - loss: 0.5405 - val_accuracy: 0.8392 - val_loss: 0.5824 - learning_rate: 5.0000e-04
Epoch 5/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8741 - loss: 0.4929 - val_accuracy: 0.8392 - val_loss: 0.5832 - learning_rate: 5.0000e-04
Epoch 6/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8945 - loss: 0.4542



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.8945 - loss: 0.4542 - val_accuracy: 0.8479 - val_loss: 0.5774 - learning_rate: 5.0000e-04
Epoch 7/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.9058 - loss: 0.4104 - val_accuracy: 0.8458 - val_loss: 0.5885 - learning_rate: 5.0000e-04
Epoch 8/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9053 - loss: 0.4140 - val_accuracy: 0.8383 - val_loss: 0.5938 - learning_rate: 5.0000e-04
Epoch 9/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9178 - loss: 0.3788 - val_accuracy: 0.8396 - val_loss: 0.6039 - learning_rate: 5.0000e-04
Epoch 10/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9211 - loss: 0.3639 - val_accuracy: 0.8388 - val_loss: 0.6192 - learni



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.5092 - loss: 1.2136 - val_accuracy: 0.4652 - val_loss: 1.1076 - learning_rate: 5.0000e-04
Epoch 2/150
[1m297/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.7565 - loss: 0.7511



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7567 - loss: 0.7508 - val_accuracy: 0.8158 - val_loss: 0.6596 - learning_rate: 5.0000e-04
Epoch 3/150
[1m299/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.8330 - loss: 0.6079



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8330 - loss: 0.6079 - val_accuracy: 0.8358 - val_loss: 0.5955 - learning_rate: 5.0000e-04
Epoch 4/150
[1m299/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8564 - loss: 0.5501



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.8565 - loss: 0.5501 - val_accuracy: 0.8441 - val_loss: 0.5857 - learning_rate: 5.0000e-04
Epoch 5/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8840 - loss: 0.4879 - val_accuracy: 0.8420 - val_loss: 0.5860 - learning_rate: 5.0000e-04
Epoch 6/150
[1m291/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8868 - loss: 0.4692



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8868 - loss: 0.4692 - val_accuracy: 0.8483 - val_loss: 0.5921 - learning_rate: 5.0000e-04
Epoch 7/150
[1m294/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9048 - loss: 0.4253



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9046 - loss: 0.4256 - val_accuracy: 0.8512 - val_loss: 0.5897 - learning_rate: 5.0000e-04
Epoch 8/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.9104 - loss: 0.4014 - val_accuracy: 0.8470 - val_loss: 0.5931 - learning_rate: 5.0000e-04
Epoch 9/150
[1m292/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9227 - loss: 0.3759



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9226 - loss: 0.3761 - val_accuracy: 0.8529 - val_loss: 0.5999 - learning_rate: 5.0000e-04
Epoch 10/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9109 - loss: 0.3962 - val_accuracy: 0.8495 - val_loss: 0.6219 - learning_rate: 5.0000e-04
Epoch 11/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9221 - loss: 0.3619 - val_accuracy: 0.8433 - val_loss: 0.6373 - learning_rate: 5.0000e-04
Epoch 12/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9243 - loss: 0.3484 - val_accuracy: 0.8474 - val_loss: 0.6415 - learning_rate: 5.0000e-04
Epoch 13/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9328 - loss: 0.3258 - val_accuracy: 0.8399 - val_loss: 0.6646 - lear



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.4654 - loss: 1.3306 - val_accuracy: 0.3831 - val_loss: 1.1529 - learning_rate: 5.0000e-04
Epoch 2/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.7633 - loss: 0.7590



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.7635 - loss: 0.7586 - val_accuracy: 0.8295 - val_loss: 0.6501 - learning_rate: 5.0000e-04
Epoch 3/150
[1m292/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8313 - loss: 0.6112



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.8313 - loss: 0.6112 - val_accuracy: 0.8416 - val_loss: 0.5904 - learning_rate: 5.0000e-04
Epoch 4/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8614 - loss: 0.5452



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8613 - loss: 0.5454 - val_accuracy: 0.8433 - val_loss: 0.5878 - learning_rate: 5.0000e-04
Epoch 5/150
[1m294/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8775 - loss: 0.5009



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8774 - loss: 0.5011 - val_accuracy: 0.8529 - val_loss: 0.5750 - learning_rate: 5.0000e-04
Epoch 6/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 7ms/step - accuracy: 0.8967 - loss: 0.4504



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8966 - loss: 0.4507 - val_accuracy: 0.8554 - val_loss: 0.5717 - learning_rate: 5.0000e-04
Epoch 7/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9004 - loss: 0.4340 - val_accuracy: 0.8554 - val_loss: 0.5710 - learning_rate: 5.0000e-04
Epoch 8/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9096 - loss: 0.4037 - val_accuracy: 0.8499 - val_loss: 0.5875 - learning_rate: 5.0000e-04
Epoch 9/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9164 - loss: 0.3869 - val_accuracy: 0.8541 - val_loss: 0.5973 - learning_rate: 5.0000e-04
Epoch 10/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9181 - loss: 0.3814 - val_accuracy: 0.8524 - val_loss: 0.5974 - learnin



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9341 - loss: 0.3369 - val_accuracy: 0.8587 - val_loss: 0.6073 - learning_rate: 2.5000e-04
Epoch 13/150
[1m294/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9358 - loss: 0.3278



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9358 - loss: 0.3277 - val_accuracy: 0.8604 - val_loss: 0.6125 - learning_rate: 2.5000e-04
Epoch 14/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9420 - loss: 0.2990 - val_accuracy: 0.8587 - val_loss: 0.6358 - learning_rate: 2.5000e-04
Epoch 15/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9497 - loss: 0.2856 - val_accuracy: 0.8541 - val_loss: 0.6484 - learning_rate: 2.5000e-04
Epoch 16/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9400 - loss: 0.2977 - val_accuracy: 0.8520 - val_loss: 0.6622 - learning_rate: 2.5000e-04
Epoch 17/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9470 - loss: 0.2828 - val_accuracy: 0.8499 - val_loss: 0.6658 - lear



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.4995 - loss: 1.2762 - val_accuracy: 0.3110 - val_loss: 1.2028 - learning_rate: 5.0000e-04
Epoch 2/150
[1m293/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 6ms/step - accuracy: 0.7539 - loss: 0.7732



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7542 - loss: 0.7724 - val_accuracy: 0.8020 - val_loss: 0.6910 - learning_rate: 5.0000e-04
Epoch 3/150
[1m297/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - accuracy: 0.8301 - loss: 0.6126



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.8301 - loss: 0.6126 - val_accuracy: 0.8399 - val_loss: 0.6102 - learning_rate: 5.0000e-04
Epoch 4/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.8617 - loss: 0.5464 - val_accuracy: 0.8395 - val_loss: 0.5970 - learning_rate: 5.0000e-04
Epoch 5/150
[1m297/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8799 - loss: 0.5037



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.8799 - loss: 0.5037 - val_accuracy: 0.8429 - val_loss: 0.5971 - learning_rate: 5.0000e-04
Epoch 6/150
[1m296/300[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.8971 - loss: 0.4546



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8969 - loss: 0.4549 - val_accuracy: 0.8454 - val_loss: 0.6012 - learning_rate: 5.0000e-04
Epoch 7/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9002 - loss: 0.4440 - val_accuracy: 0.8408 - val_loss: 0.6021 - learning_rate: 5.0000e-04
Epoch 8/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9066 - loss: 0.4106 - val_accuracy: 0.8429 - val_loss: 0.6092 - learning_rate: 5.0000e-04
Epoch 9/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9187 - loss: 0.3809 - val_accuracy: 0.8404 - val_loss: 0.6122 - learning_rate: 5.0000e-04
Epoch 10/150
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9211 - loss: 0.3709 - val_accuracy: 0.8378 - val_loss: 0.6386 - learnin