<a href="https://colab.research.google.com/github/Siera-Collab/Tugas-KI-Math-UI-Siera-Barokatillah/blob/main/EndTermTask_Siera_Barokatillah_2006568714.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import Module**

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import precision_score, recall_score, f1_score

**# 1. Deskripsi Dataset**

In [None]:
print("Dataset: IMDB Movie Reviews\n")
vocab_size = 10000
max_length = 100
print(f"Vocabulary Size: {vocab_size}")
print(f"Maximum Sequence Length: {max_length}\n")

Dataset: IMDB Movie Reviews

Vocabulary Size: 10000
Maximum Sequence Length: 100



**# 2. Preprocessing**

In [None]:
print("Data Preprocessing\n")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')
print(f"Training Data Shape: {x_train.shape}")
print(f"Test Data Shape: {x_test.shape}\n")

Data Preprocessing

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training Data Shape: (25000, 100)
Test Data Shape: (25000, 100)



**# 3. Konfigurasi Model**

In [None]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.build(input_shape=(None, max_length))
model.summary()


**# 4. Pelatihan Model**

In [None]:
print("\nModel Training\n")
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)



Model Training

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 407ms/step - accuracy: 0.6440 - loss: 0.5982 - val_accuracy: 0.8280 - val_loss: 0.3876
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m153s[0m 489ms/step - accuracy: 0.8811 - loss: 0.2986 - val_accuracy: 0.8294 - val_loss: 0.4369
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 409ms/step - accuracy: 0.9355 - loss: 0.1788 - val_accuracy: 0.8106 - val_loss: 0.4715
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 404ms/step - accuracy: 0.9673 - loss: 0.0965 - val_accuracy: 0.8104 - val_loss: 0.6751
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 425ms/step - accuracy: 0.9849 - loss: 0.0448 - val_accuracy: 0.7986 - val_loss: 0.8347


**# 5. Evaluasi Model**

In [None]:
print("\nModel Evaluation\n")
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.2f}, Test Loss: {test_loss:.2f}\n")


Model Evaluation

782/782 - 43s - 55ms/step - accuracy: 0.7760 - loss: 0.9467
Test Accuracy: 0.78, Test Loss: 0.95



**# 6. Analisis Hasil**

In [None]:
print("Analysis Metrics\n")
y_pred = (model.predict(x_test) > 0.5).astype("int32")
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Analysis Metrics

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 63ms/step
Precision: 0.81
Recall: 0.72
F1-Score: 0.76


**Upgrade**

Perbarui Arsitektur dengan Batch Normalization

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, BatchNormalization, Dropout, Dense

# Tentukan parameter model
vocab_size = 10000    # Ganti dengan ukuran kosakata sebenarnya
max_length = 100      # Ganti dengan panjang input sebenarnya

# Definisikan model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=max_length),
    Bidirectional(LSTM(64, return_sequences=True)),
    BatchNormalization(),  # Normalisasi batch
    Dropout(0.4),          # Dropout moderat
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    BatchNormalization(),  # Normalisasi batch
    Dropout(0.4),          # Dropout moderat
    Dense(1, activation='sigmoid')  # Output untuk masalah biner
])

# Ringkasan model
model.summary()



In [None]:
!pip install keras-tuner -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h

Tambahkan Hyperparameter Tuning dengan Keras Tuner

In [None]:
import keras_tuner as kt


In [None]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential([
        Embedding(vocab_size, hp.Int('embedding_dim', 64, 256, step=64), input_length=max_length),
        Bidirectional(LSTM(hp.Int('lstm_units', 32, 128, step=32), return_sequences=True)),
        BatchNormalization(),
        Dropout(hp.Float('dropout_rate', 0.2, 0.5, step=0.1)),
        Bidirectional(LSTM(hp.Int('lstm_units_2', 16, 64, step=16))),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(hp.Float('dropout_dense', 0.2, 0.5, step=0.1)),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Tuning Model
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    directory='hyperparameter_tuning',
    project_name='imdb_sentiment_analysis'
)

tuner.search(x_train, y_train, validation_split=0.2, epochs=10)

# Ambil model terbaik
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tuner.hypermodel.build(best_hps)

# Tampilkan hasil hyperparameter terbaik
print(f"Best embedding_dim: {best_hps.get('embedding_dim')}")
print(f"Best lstm_units: {best_hps.get('lstm_units')}")
print(f"Best dropout_rate: {best_hps.get('dropout_rate')}")


Trial 7 Complete [00h 05m 06s]
val_accuracy: 0.795799970626831

Best val_accuracy So Far: 0.828000009059906
Total elapsed time: 01h 05m 22s

Search: Running Trial #8

Value             |Best Value So Far |Hyperparameter
64                |192               |embedding_dim
128               |96                |lstm_units
0.2               |0.3               |dropout_rate
16                |64                |lstm_units_2
0.4               |0.2               |dropout_dense
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
[1m488/625[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m47s[0m 346ms/step - accuracy: 0.5986 - loss: 0.6974

KeyboardInterrupt: 

Lakukan Pelatihan Model dengan Hasil Tuning

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluasi Model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.2f}, Test Loss: {test_loss:.2f}")


Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 401ms/step - accuracy: 0.6253 - loss: 0.7055 - val_accuracy: 0.6650 - val_loss: 0.5712
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 415ms/step - accuracy: 0.8672 - loss: 0.3264 - val_accuracy: 0.8082 - val_loss: 0.4437
Epoch 3/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 409ms/step - accuracy: 0.9229 - loss: 0.2060 - val_accuracy: 0.7846 - val_loss: 0.5866
Epoch 4/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 400ms/step - accuracy: 0.9575 - loss: 0.1247 - val_accuracy: 0.7958 - val_loss: 0.5859
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 403ms/step - accuracy: 0.9716 - loss: 0.0844 - val_accuracy: 0.7734 - val_loss: 0.8649
Epoch 6/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 419ms/step - accuracy: 0.9782 - loss: 0.0630 - val_accuracy: 0.7838 - val_loss: 0.8202
Epoc