In [1]:
# importing
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Bidirectional, Dropout, Dense, LSTM
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from utils import load_nusax_data
from model import SimpleRNNModel
from layers import RNN as CustomRNN, BiRNN as CustomBiRNN

In [2]:
# ----- Fixed Hyperparameters -----
MAX_TOKENS    = 20000
SEQ_LENGTH    = 100
EMBED_DIM     = 128
DROPOUT_RATE  = 0.5
BATCH_SIZE    = 50
EPOCHS        = 15

In [3]:
# ----- Load and preprocess data -----
tok_train, y_train, tok_val, y_val, tok_test, y_test, vocab_size, num_classes = \
    load_nusax_data(MAX_TOKENS, SEQ_LENGTH)

os.makedirs('results/weights', exist_ok=True)
os.makedirs('results/plots', exist_ok=True)
results_summary = []

In [4]:
# ----- Needed Function Definitions -----
def build_keras_model(num_rnn_layers, rnn_units, bidirectional, rnn_type='SimpleRNN', dropout_rate=DROPOUT_RATE):
    keras_model = Sequential()
    keras_model.add(Embedding(
        input_dim=vocab_size,
        output_dim=EMBED_DIM,
        input_length=SEQ_LENGTH,
        name='embedding'
    ))

    for i in range(num_rnn_layers):
        is_last_rnn = (i == num_rnn_layers - 1)
        return_sequences = not is_last_rnn
        
        layer_name_prefix = f"{'bi' if bidirectional else ''}{rnn_type.lower()}_{i}"

        if rnn_type == 'SimpleRNN':
            rnn_layer_constructor = SimpleRNN(rnn_units, return_sequences=return_sequences, name=f"{layer_name_prefix}_simplernn")
        elif rnn_type == 'LSTM':
            rnn_layer_constructor = LSTM(rnn_units, return_sequences=return_sequences, name=f"{layer_name_prefix}_lstm")
        else:
            raise ValueError(f"Unsupported RNN type: {rnn_type}")

        if bidirectional:
            keras_model.add(Bidirectional(rnn_layer_constructor, name=f"{layer_name_prefix}_bidir"))
        else:
            keras_model.add(rnn_layer_constructor)
            
    keras_model.add(Dropout(dropout_rate, name='dropout'))
    keras_model.add(Dense(num_classes, activation='softmax', name='output'))

    keras_model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return keras_model

def plot_history(history, model_name, experiment_name):
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Loss - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'Accuracy - {model_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plot_filename = f"results/plots/{experiment_name}_{model_name}_history.png"
    plt.savefig(plot_filename)
    print(f"Saved plot to {plot_filename}")
    plt.close()

In [5]:
# --- Experiment Configurations ---
experiments = []

# 1. beda layer RNN
base_rnn_units = 32
base_bidirectional = True
for num_layers in [1, 2, 3]:
    experiments.append({
        "name": f"NumLayers_{num_layers}_Units_{base_rnn_units}_Dir_{'Bi' if base_bidirectional else 'Uni'}",
        "num_rnn_layers": num_layers,
        "rnn_units": base_rnn_units,
        "bidirectional": base_bidirectional
    })

# 2. beda cell RNN per layer
base_num_layers = 2
base_bidirectional = True
for rnn_units in [16, 32, 64]:
    experiments.append({
        "name": f"NumLayers_{base_num_layers}_Units_{rnn_units}_Dir_{'Bi' if base_bidirectional else 'Uni'}",
        "num_rnn_layers": base_num_layers,
        "rnn_units": rnn_units,
        "bidirectional": base_bidirectional
    })

# 3. beda RNN berdasarkan arah
base_num_layers = 2
base_rnn_units = 32
for bidirectional_flag in [False, True]:
    experiments.append({
        "name": f"NumLayers_{base_num_layers}_Units_{base_rnn_units}_Dir_{'Bi' if bidirectional_flag else 'Uni'}",
        "num_rnn_layers": base_num_layers,
        "rnn_units": base_rnn_units,
        "bidirectional": bidirectional_flag
    })

In [6]:
# --- Run Experiments ---
for i, exp_config in enumerate(experiments):
    print(f"\n--- Running Experiment {i+1}/{len(experiments)}: {exp_config['name']} ---")
    
    model_name = exp_config['name']
    num_rnn_layers_exp = exp_config['num_rnn_layers']
    rnn_units_exp = exp_config['rnn_units']
    bidirectional_exp = exp_config['bidirectional']

    # ----- bikin model keras -----
    keras_model = build_keras_model(
        num_rnn_layers=num_rnn_layers_exp,
        rnn_units=rnn_units_exp,
        bidirectional=bidirectional_exp,
        rnn_type='SimpleRNN'
    )
    keras_model.summary()

    # ----- Training -----
    print(f"Training Keras model: {model_name}")
    history = keras_model.fit(
        tok_train, y_train,
        validation_data=(tok_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        verbose=2
    )
    weights_path = f'results/weights/keras_rnn_{model_name}.weights.h5'
    keras_model.save_weights(weights_path)
    print(f"Saved Keras weights to {weights_path}")

    plot_history(history, model_name, "experiment")

    # ----- Evaluation on test set (Keras) -----
    y_pred_prob_keras = keras_model.predict(tok_test, batch_size=BATCH_SIZE)
    y_pred_keras = np.argmax(y_pred_prob_keras, axis=1)
    f1_test_keras = f1_score(y_test, y_pred_keras, average='macro')
    print(f"[Keras - {model_name}] Test Macro F1-score: {f1_test_keras:.4f}")

    f1_test_custom = None

    # ----- Custom Model Comparison for THIS experiment -----
    print(f"\n--- Comparing with Custom Model for {model_name} ---")

    if num_rnn_layers_exp > 0:
        custom_return_sequences = [True] * (num_rnn_layers_exp - 1) + [False]
    else:
        custom_return_sequences = []

    try:
        custom_model = SimpleRNNModel(
            vocab_size=vocab_size,
            embed_dim=EMBED_DIM,
            hidden_dim=rnn_units_exp,
            num_classes=num_classes,
            dropout_rate=DROPOUT_RATE,
            num_layers=num_rnn_layers_exp,
            bidirectional=bidirectional_exp,
            return_sequences_list=custom_return_sequences
        )

        keras_weights_list = keras_model.get_weights()
        custom_model.load_keras_weights(keras_weights_list)
        print("Loaded Keras weights into custom model.")

        custom_model.set_training_mode(False)

        y_pred_prob_custom_list = []
        num_test_samples = tok_test.shape[0]
        for j_batch in range(0, num_test_samples, BATCH_SIZE):
            tok_test_batch = tok_test[j_batch:j_batch + BATCH_SIZE]
            probs_batch_custom = custom_model.forward(tok_test_batch, training=False)
            y_pred_prob_custom_list.append(probs_batch_custom)
        
        y_pred_prob_custom = np.concatenate(y_pred_prob_custom_list, axis=0)
        y_pred_custom = np.argmax(y_pred_prob_custom, axis=1)
        f1_test_custom = f1_score(y_test, y_pred_custom, average='macro')
        print(f"[Custom - {model_name}] Test Macro F1-score: {f1_test_custom:.4f}")
        
        if not np.isclose(f1_test_keras, f1_test_custom, atol=1e-3):
            print(f"Warning: Keras F1 ({f1_test_keras:.4f}) and Custom F1 ({f1_test_custom:.4f}) scores differ significantly for {model_name}.")

    except Exception as e:
        print(f"Error during custom model comparison for {model_name}: {e}")
        f1_test_custom = None

    results_summary.append({
        "name": model_name,
        "num_rnn_layers": num_rnn_layers_exp,
        "rnn_units": rnn_units_exp,
        "bidirectional": bidirectional_exp,
        "f1_keras": f1_test_keras,
        "f1_custom": f1_test_custom
    })


--- Running Experiment 1/8: NumLayers_1_Units_32_Dir_Bi ---




Training Keras model: NumLayers_1_Units_32_Dir_Bi
Epoch 1/15
10/10 - 2s - 240ms/step - accuracy: 0.3400 - loss: 1.1068 - val_accuracy: 0.3400 - val_loss: 1.0525
Epoch 2/15
10/10 - 0s - 37ms/step - accuracy: 0.6260 - loss: 0.9228 - val_accuracy: 0.3800 - val_loss: 1.0361
Epoch 3/15
10/10 - 0s - 37ms/step - accuracy: 0.8220 - loss: 0.6992 - val_accuracy: 0.4200 - val_loss: 1.0169
Epoch 4/15
10/10 - 0s - 41ms/step - accuracy: 0.9240 - loss: 0.4692 - val_accuracy: 0.3800 - val_loss: 1.0335
Epoch 5/15
10/10 - 0s - 37ms/step - accuracy: 0.9680 - loss: 0.3021 - val_accuracy: 0.4100 - val_loss: 1.0666
Epoch 6/15
10/10 - 0s - 36ms/step - accuracy: 0.9900 - loss: 0.1937 - val_accuracy: 0.4200 - val_loss: 1.1259
Epoch 7/15
10/10 - 0s - 37ms/step - accuracy: 0.9900 - loss: 0.1366 - val_accuracy: 0.4500 - val_loss: 1.1202
Epoch 8/15
10/10 - 0s - 37ms/step - accuracy: 0.9980 - loss: 0.0840 - val_accuracy: 0.4000 - val_loss: 1.1844
Epoch 9/15
10/10 - 0s - 36ms/step - accuracy: 1.0000 - loss: 0.0603 -



Training Keras model: NumLayers_2_Units_32_Dir_Bi
Epoch 1/15
10/10 - 4s - 401ms/step - accuracy: 0.3760 - loss: 1.1348 - val_accuracy: 0.4800 - val_loss: 1.0469
Epoch 2/15
10/10 - 0s - 45ms/step - accuracy: 0.6500 - loss: 0.8199 - val_accuracy: 0.5400 - val_loss: 0.9783
Epoch 3/15
10/10 - 0s - 46ms/step - accuracy: 0.8940 - loss: 0.4981 - val_accuracy: 0.5600 - val_loss: 0.9119
Epoch 4/15
10/10 - 0s - 45ms/step - accuracy: 0.9540 - loss: 0.2939 - val_accuracy: 0.5300 - val_loss: 0.9434
Epoch 5/15
10/10 - 0s - 46ms/step - accuracy: 0.9900 - loss: 0.1646 - val_accuracy: 0.4800 - val_loss: 0.9982
Epoch 6/15
10/10 - 0s - 46ms/step - accuracy: 0.9940 - loss: 0.0904 - val_accuracy: 0.5200 - val_loss: 1.0154
Epoch 7/15
10/10 - 0s - 46ms/step - accuracy: 1.0000 - loss: 0.0548 - val_accuracy: 0.5100 - val_loss: 1.0819
Epoch 8/15
10/10 - 1s - 51ms/step - accuracy: 0.9980 - loss: 0.0349 - val_accuracy: 0.5300 - val_loss: 1.1081
Epoch 9/15
10/10 - 0s - 48ms/step - accuracy: 1.0000 - loss: 0.0330 -



Training Keras model: NumLayers_3_Units_32_Dir_Bi
Epoch 1/15
10/10 - 6s - 566ms/step - accuracy: 0.3820 - loss: 1.2321 - val_accuracy: 0.4100 - val_loss: 1.1035
Epoch 2/15
10/10 - 1s - 63ms/step - accuracy: 0.6180 - loss: 0.8570 - val_accuracy: 0.5000 - val_loss: 1.0340
Epoch 3/15
10/10 - 1s - 60ms/step - accuracy: 0.7620 - loss: 0.6007 - val_accuracy: 0.4700 - val_loss: 1.1081
Epoch 4/15
10/10 - 1s - 59ms/step - accuracy: 0.8960 - loss: 0.3263 - val_accuracy: 0.4800 - val_loss: 1.1485
Epoch 5/15
10/10 - 1s - 58ms/step - accuracy: 0.9740 - loss: 0.1611 - val_accuracy: 0.4400 - val_loss: 1.3077
Epoch 6/15
10/10 - 1s - 57ms/step - accuracy: 0.9980 - loss: 0.0651 - val_accuracy: 0.4900 - val_loss: 1.4529
Epoch 7/15
10/10 - 1s - 57ms/step - accuracy: 1.0000 - loss: 0.0370 - val_accuracy: 0.4500 - val_loss: 1.5269
Epoch 8/15
10/10 - 1s - 56ms/step - accuracy: 1.0000 - loss: 0.0257 - val_accuracy: 0.4900 - val_loss: 1.5372
Epoch 9/15
10/10 - 1s - 57ms/step - accuracy: 1.0000 - loss: 0.0187 -



Training Keras model: NumLayers_2_Units_16_Dir_Bi
Epoch 1/15
10/10 - 4s - 377ms/step - accuracy: 0.3660 - loss: 1.1640 - val_accuracy: 0.3600 - val_loss: 1.0993
Epoch 2/15
10/10 - 0s - 48ms/step - accuracy: 0.4980 - loss: 0.9814 - val_accuracy: 0.3200 - val_loss: 1.1338
Epoch 3/15
10/10 - 0s - 46ms/step - accuracy: 0.7320 - loss: 0.7554 - val_accuracy: 0.3800 - val_loss: 1.1128
Epoch 4/15
10/10 - 0s - 46ms/step - accuracy: 0.8540 - loss: 0.5525 - val_accuracy: 0.3700 - val_loss: 1.1009
Epoch 5/15
10/10 - 0s - 46ms/step - accuracy: 0.9400 - loss: 0.4107 - val_accuracy: 0.4100 - val_loss: 1.1133
Epoch 6/15
10/10 - 0s - 48ms/step - accuracy: 0.9700 - loss: 0.2916 - val_accuracy: 0.3700 - val_loss: 1.1072
Epoch 7/15
10/10 - 0s - 48ms/step - accuracy: 0.9880 - loss: 0.2127 - val_accuracy: 0.4000 - val_loss: 1.1557
Epoch 8/15
10/10 - 0s - 48ms/step - accuracy: 0.9840 - loss: 0.1690 - val_accuracy: 0.3700 - val_loss: 1.2049
Epoch 9/15
10/10 - 0s - 47ms/step - accuracy: 0.9940 - loss: 0.1249 -



Training Keras model: NumLayers_2_Units_32_Dir_Bi
Epoch 1/15
10/10 - 4s - 445ms/step - accuracy: 0.4240 - loss: 1.1267 - val_accuracy: 0.4700 - val_loss: 1.0620
Epoch 2/15
10/10 - 1s - 57ms/step - accuracy: 0.6620 - loss: 0.8228 - val_accuracy: 0.5400 - val_loss: 1.0155
Epoch 3/15
10/10 - 1s - 55ms/step - accuracy: 0.8260 - loss: 0.5568 - val_accuracy: 0.5500 - val_loss: 0.9824
Epoch 4/15
10/10 - 1s - 53ms/step - accuracy: 0.9440 - loss: 0.3210 - val_accuracy: 0.5200 - val_loss: 0.9883
Epoch 5/15
10/10 - 1s - 53ms/step - accuracy: 0.9880 - loss: 0.1582 - val_accuracy: 0.4700 - val_loss: 1.1114
Epoch 6/15
10/10 - 1s - 54ms/step - accuracy: 0.9960 - loss: 0.0907 - val_accuracy: 0.5200 - val_loss: 1.0909
Epoch 7/15
10/10 - 1s - 53ms/step - accuracy: 1.0000 - loss: 0.0461 - val_accuracy: 0.5000 - val_loss: 1.1401
Epoch 8/15
10/10 - 1s - 53ms/step - accuracy: 1.0000 - loss: 0.0275 - val_accuracy: 0.5100 - val_loss: 1.1391
Epoch 9/15
10/10 - 1s - 53ms/step - accuracy: 1.0000 - loss: 0.0195 -



Training Keras model: NumLayers_2_Units_64_Dir_Bi
Epoch 1/15
10/10 - 4s - 429ms/step - accuracy: 0.3700 - loss: 1.1795 - val_accuracy: 0.4400 - val_loss: 1.0763
Epoch 2/15
10/10 - 1s - 59ms/step - accuracy: 0.6520 - loss: 0.8520 - val_accuracy: 0.5300 - val_loss: 1.0252
Epoch 3/15
10/10 - 1s - 60ms/step - accuracy: 0.8200 - loss: 0.5289 - val_accuracy: 0.4700 - val_loss: 1.1108
Epoch 4/15
10/10 - 1s - 59ms/step - accuracy: 0.9580 - loss: 0.2281 - val_accuracy: 0.5000 - val_loss: 1.1712
Epoch 5/15
10/10 - 1s - 62ms/step - accuracy: 0.9980 - loss: 0.0719 - val_accuracy: 0.5000 - val_loss: 1.2601
Epoch 6/15
10/10 - 1s - 64ms/step - accuracy: 1.0000 - loss: 0.0260 - val_accuracy: 0.4500 - val_loss: 1.3505
Epoch 7/15
10/10 - 1s - 60ms/step - accuracy: 1.0000 - loss: 0.0145 - val_accuracy: 0.4600 - val_loss: 1.4066
Epoch 8/15
10/10 - 1s - 59ms/step - accuracy: 1.0000 - loss: 0.0083 - val_accuracy: 0.4700 - val_loss: 1.3917
Epoch 9/15
10/10 - 1s - 59ms/step - accuracy: 1.0000 - loss: 0.0065 -



Training Keras model: NumLayers_2_Units_32_Dir_Uni
Epoch 1/15
10/10 - 3s - 250ms/step - accuracy: 0.3820 - loss: 1.1230 - val_accuracy: 0.5400 - val_loss: 1.0131
Epoch 2/15
10/10 - 0s - 43ms/step - accuracy: 0.5380 - loss: 0.9188 - val_accuracy: 0.4600 - val_loss: 1.0058
Epoch 3/15
10/10 - 0s - 40ms/step - accuracy: 0.6860 - loss: 0.7615 - val_accuracy: 0.5600 - val_loss: 0.9142
Epoch 4/15
10/10 - 0s - 40ms/step - accuracy: 0.8400 - loss: 0.5420 - val_accuracy: 0.5200 - val_loss: 0.9118
Epoch 5/15
10/10 - 0s - 40ms/step - accuracy: 0.9440 - loss: 0.3467 - val_accuracy: 0.5100 - val_loss: 0.9560
Epoch 6/15
10/10 - 0s - 39ms/step - accuracy: 0.9880 - loss: 0.2162 - val_accuracy: 0.5800 - val_loss: 0.9791
Epoch 7/15
10/10 - 0s - 41ms/step - accuracy: 0.9980 - loss: 0.1278 - val_accuracy: 0.5200 - val_loss: 1.1426
Epoch 8/15
10/10 - 0s - 45ms/step - accuracy: 0.9980 - loss: 0.1051 - val_accuracy: 0.5700 - val_loss: 1.1206
Epoch 9/15
10/10 - 0s - 45ms/step - accuracy: 0.9980 - loss: 0.0749 



Training Keras model: NumLayers_2_Units_32_Dir_Bi
Epoch 1/15
10/10 - 4s - 383ms/step - accuracy: 0.3640 - loss: 1.1672 - val_accuracy: 0.3900 - val_loss: 1.0854
Epoch 2/15
10/10 - 1s - 51ms/step - accuracy: 0.6320 - loss: 0.8259 - val_accuracy: 0.4500 - val_loss: 1.0524
Epoch 3/15
10/10 - 0s - 47ms/step - accuracy: 0.8300 - loss: 0.5325 - val_accuracy: 0.5200 - val_loss: 0.9631
Epoch 4/15
10/10 - 0s - 49ms/step - accuracy: 0.9400 - loss: 0.2844 - val_accuracy: 0.4600 - val_loss: 1.1019
Epoch 5/15
10/10 - 0s - 47ms/step - accuracy: 0.9940 - loss: 0.1093 - val_accuracy: 0.5000 - val_loss: 1.0588
Epoch 6/15
10/10 - 0s - 46ms/step - accuracy: 0.9940 - loss: 0.0551 - val_accuracy: 0.5000 - val_loss: 1.1008
Epoch 7/15
10/10 - 0s - 46ms/step - accuracy: 1.0000 - loss: 0.0309 - val_accuracy: 0.4800 - val_loss: 1.1901
Epoch 8/15
10/10 - 0s - 46ms/step - accuracy: 1.0000 - loss: 0.0178 - val_accuracy: 0.4700 - val_loss: 1.1847
Epoch 9/15
10/10 - 0s - 47ms/step - accuracy: 1.0000 - loss: 0.0126 -

In [7]:
# --- Print Hasil ---
print("\n\n--- Experiment Results Summary ---")
print("Name | RNN Layers | RNN Units | Bidirectional | F1 Keras | F1 Custom")
print("-" * 80)
for res in results_summary:
    f1_custom_str = f"{res['f1_custom']:.4f}" if res['f1_custom'] is not None else "N/A"
    print(f"{res['name']} | {res['num_rnn_layers']} | {res['rnn_units']} | {res['bidirectional']} | {res['f1_keras']:.4f} | {f1_custom_str}")



--- Experiment Results Summary ---
Name | RNN Layers | RNN Units | Bidirectional | F1 Keras | F1 Custom
--------------------------------------------------------------------------------
NumLayers_1_Units_32_Dir_Bi | 1 | 32 | True | 0.4213 | 0.4213
NumLayers_2_Units_32_Dir_Bi | 2 | 32 | True | 0.4696 | 0.4696
NumLayers_3_Units_32_Dir_Bi | 3 | 32 | True | 0.4466 | 0.4466
NumLayers_2_Units_16_Dir_Bi | 2 | 16 | True | 0.3829 | 0.3829
NumLayers_2_Units_32_Dir_Bi | 2 | 32 | True | 0.5267 | 0.5267
NumLayers_2_Units_64_Dir_Bi | 2 | 64 | True | 0.3929 | 0.3929
NumLayers_2_Units_32_Dir_Uni | 2 | 32 | False | 0.4883 | 0.4883
NumLayers_2_Units_32_Dir_Bi | 2 | 32 | True | 0.4646 | 0.4646
