In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model




In [10]:
# 1. Baca dataset dari CSV
df = pd.read_csv("sentiment_budaya_dataset.csv") 

In [11]:
# 2. Preprocessing teks
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['story'])

In [12]:
sequences = tokenizer.texts_to_sequences(df['story'])
X = pad_sequences(sequences, maxlen=100)  # Bisa disesuaikan

In [13]:
# 3. Label ke bentuk one-hot encoding
y = to_categorical(df['label'], num_classes=2)

In [14]:
# 4. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
# 5. Bangun model Keras
model = Sequential([
    Embedding(input_dim=5000, output_dim=16, input_length=100),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(2, activation='softmax')  # 2 kelas: positif & negatif
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])





In [18]:
# 6. Training model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2acd5e8cdc0>

In [19]:
# 7. Evaluasi model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print(f"✅ Akurasi: {acc:.2f}")

✅ Akurasi: 1.00


In [20]:
# 8. Simpan model ke file .h5
model.save("sentiment_model.h5")
print("✅ Model disimpan sebagai 'sentiment_model.h5'")

✅ Model disimpan sebagai 'sentiment_model.h5'


  saving_api.save_model(


In [24]:
def predict_sentiment(text):
    model = load_model("sentiment_model.h5")
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=100)
    probas = model.predict(padded)[0]

    percent_negatif = probas[0] * 100
    percent_positif = probas[1] * 100

    label = probas.argmax()
    sentiment = "Positif" if label == 1 else "Negatif"

    print(f"Negatif: {percent_negatif:.2f}%, Positif: {percent_positif:.2f}%")
    return sentiment


In [27]:
story_input = "nuansanya sangat bagus sekali saya sangat suka sekali dengan budaya yang ada disini."
hasil = predict_sentiment(story_input)
print(f"Prediksi Sentimen: {hasil}")


Negatif: 23.57%, Positif: 76.43%
Prediksi Sentimen: Positif
