In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout

# Contoh kalimat
sentences = [
    "Saya suka cuaca hari ini.",
    "Mobil itu rusak.",
    "Hari ini sangat cerah dan menyenangkan.",
    "Saya benci ketika hujan.",
    "Kendaraan itu berfungsi dengan baik.",
    "Cuaca hari ini buruk sekali."
]
labels = np.array([1, 0, 1, 0, 1, 0])  # 1 untuk positif, 0 untuk negatif

# Tokenisasi
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
word_index = tokenizer.word_index

# Padding
maxlen = 20
data = pad_sequences(sequences, maxlen=maxlen)

print("Data setelah padding:")
print(data)
print("Indeks kata:")
print(word_index)

Data setelah padding:
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3  6  4  1  2]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  7  5  8]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  2  9 10 11 12]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3 13 14 15]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 16  5 17 18 19]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  4  1  2 20 21]]
Indeks kata:
{'hari': 1, 'ini': 2, 'saya': 3, 'cuaca': 4, 'itu': 5, 'suka': 6, 'mobil': 7, 'rusak': 8, 'sangat': 9, 'cerah': 10, 'dan': 11, 'menyenangkan': 12, 'benci': 13, 'ketika': 14, 'hujan': 15, 'kendaraan': 16, 'berfungsi': 17, 'dengan': 18, 'baik': 19, 'buruk': 20, 'sekali': 21}


In [2]:
vocab_size = len(word_index) + 1  # +1 untuk padding token

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=50, input_length=maxlen),
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(10, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 50)            1100      
                                                                 
 conv1d (Conv1D)             (None, 16, 128)           32128     
                                                                 
 global_max_pooling1d (Glob  (None, 128)               0         
 alMaxPooling1D)                                                 
                                                                 
 dense (Dense)               (None, 10)                1290      
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                        

In [3]:
model.fit(data, labels, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b0d9be9a350>

In [4]:
# Contoh kalimat baru untuk prediksi
test_sentences = ["Cuaca hari ini bagus.", "Mobil itu rusak parah."]
test_sequences = tokenizer.texts_to_sequences(test_sentences)
X_test = pad_sequences(test_sequences, maxlen=maxlen)

predictions = model.predict(X_test)

# Output hasil prediksi
for sentence, prediction in zip(test_sentences, predictions):
    print("Kalimat:", sentence)
    if prediction > 0.5:
        print("Prediksi: Positif")
    else:
        print("Prediksi: Negatif")
    print()

Kalimat: Cuaca hari ini bagus.
Prediksi: Positif

Kalimat: Mobil itu rusak parah.
Prediksi: Positif

