## Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import sys
from sklearn.metrics import f1_score

sys.path.append('../../src')
from rnn.rnn_from_scratch import SimpleRNNModel
from utils.data_preprocessing import TextPreprocessor
from rnn.model_training import create_keras_rnn


In [2]:
train = pd.read_csv('../../datasets/train.csv', index_col='id')
valid = pd.read_csv('../../datasets/valid.csv', index_col='id')
test = pd.read_csv('../../datasets/test.csv', index_col='id')
train.head()

Unnamed: 0_level_0,text,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1
219,Nikmati cicilan 0% hingga 12 bulan untuk pemes...,neutral
209,Kue-kue yang disajikan bikin saya bernostalgia...,positive
436,Ibu pernah bekerja di grab indonesia,neutral
394,Paling suka banget makan siang di sini ayam sa...,positive
592,Pelayanan bus DAMRI sangat baik,positive


In [3]:
test.head()

Unnamed: 0_level_0,text,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1
411,"Dekat dengan hotel saya menginap, hanya ditemp...",positive
729,"Iya benar, dia sedang jaga warung.",neutral
373,Kangkungnya lumayan tapi kepiting saus padangn...,negative
262,Bertempat di braga city walk yang satu gedung ...,positive
177,Gianyar terima bantuan sosial 2018 sebesar rp ...,neutral


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, 219 to 719
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    500 non-null    object
 1   label   500 non-null    object
dtypes: object(2)
memory usage: 11.7+ KB


In [5]:
test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 400 entries, 411 to 768
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    400 non-null    object
 1   label   400 non-null    object
dtypes: object(2)
memory usage: 9.4+ KB


## Preprocessing

In [6]:
preprocessor = TextPreprocessor(vocab_size=10000, max_length=100)
processed_data = preprocessor.preprocess_dataset(train, valid, test, use_vectorizer=False)

train_seq = processed_data['train_sequences']
val_seq = processed_data['val_sequences']
test_seq = processed_data['test_sequences']
train_labels = processed_data['train_labels']
val_labels = processed_data['val_labels']
test_labels = processed_data['test_labels']

vocab_size = processed_data['vocab_size']
num_classes = processed_data['num_classes']

print(f"Vocab size: {vocab_size}")
print(f"Num classes: {num_classes}")
print(f"Sequence shape: {train_seq.shape}")
print(f"Labels distribution: {np.bincount(train_labels)}")

Vocab size: 2796
Num classes: 3
Sequence shape: (500, 100)
Labels distribution: [192 119 189]


## Modelling

### Keras Models

In [10]:
configs = {
    # Pengaruh jumlah layer (3 variasi)
    'rnn_1layer': {'hidden_sizes': [64], 'bidirectional': False},
    'rnn_2layer': {'hidden_sizes': [64, 32], 'bidirectional': False},
    'rnn_3layer': {'hidden_sizes': [64, 32, 16], 'bidirectional': False},
    
    # Pengaruh banyak cell (3 variasi)
    'rnn_cells_32': {'hidden_sizes': [32, 32], 'bidirectional': False},
    'rnn_cells_64': {'hidden_sizes': [64, 64], 'bidirectional': False},
    'rnn_cells_128': {'hidden_sizes': [128, 128], 'bidirectional': False},
    
    # Pengaruh arah (2 variasi)
    'rnn_unidirectional': {'hidden_sizes': [64, 32], 'bidirectional': False},
    'rnn_bidirectional': {'hidden_sizes': [64, 32], 'bidirectional': True}
}

In [12]:
training_results = {}

for name, config in configs.items():
    print(f"\nTraining {name}...")
    
    # Create and train
    model = create_keras_rnn(vocab_size, num_classes, config)
    
    history = model.fit(
        train_seq, train_labels,
        validation_data=(val_seq, val_labels),
        epochs=10, batch_size=32, verbose=1
    )
    
    # Evaluate
    y_pred = np.argmax(model.predict(test_seq), axis=1)
    macro_f1 = f1_score(test_labels, y_pred, average='macro')
    
    # Save
    model.save(f'../../models/{name}.h5')
    
    training_results[name] = {
        'config': config,
        'macro_f1': macro_f1,
        'history': history.history
    }
    
    print(f"✅ {name} - F1: {macro_f1:.4f}")


Training rnn_1layer...
Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 52ms/step - accuracy: 0.3057 - loss: 1.1197 - val_accuracy: 0.3800 - val_loss: 1.0851
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.5171 - loss: 0.9509 - val_accuracy: 0.4400 - val_loss: 1.1119
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7156 - loss: 0.6631 - val_accuracy: 0.4700 - val_loss: 1.2517
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.7404 - loss: 0.5236 - val_accuracy: 0.4300 - val_loss: 1.3096
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.8568 - loss: 0.3936 - val_accuracy: 0.4200 - val_loss: 1.5014
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.8765 - loss: 0.3404 - val_accuracy: 0.4400 - val_loss: 1.5486
Epoch 7/10



✅ rnn_1layer - F1: 0.4572

Training rnn_2layer...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - accuracy: 0.3457 - loss: 1.1400 - val_accuracy: 0.3400 - val_loss: 1.1056
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.4695 - loss: 1.0355 - val_accuracy: 0.3400 - val_loss: 1.3685
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.6595 - loss: 0.8168 - val_accuracy: 0.3900 - val_loss: 1.2517
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.7964 - loss: 0.5682 - val_accuracy: 0.3400 - val_loss: 1.4850
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.9515 - loss: 0.2599 - val_accuracy: 0.3500 - val_loss: 1.6159
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.9930 - loss: 0.1030 - val_accuracy: 0.3500 - val_loss: 1.6734
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━



✅ rnn_2layer - F1: 0.3919

Training rnn_3layer...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 100ms/step - accuracy: 0.3619 - loss: 1.2115 - val_accuracy: 0.3800 - val_loss: 1.1062
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.3424 - loss: 1.2238 - val_accuracy: 0.3600 - val_loss: 1.1410
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 0.4233 - loss: 1.1575 - val_accuracy: 0.4200 - val_loss: 1.2022
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.5043 - loss: 1.0407 - val_accuracy: 0.3200 - val_loss: 1.2144
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.4654 - loss: 1.0828 - val_accuracy: 0.3500 - val_loss: 1.1866
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.4325 - loss: 1.1205 - val_accuracy: 0.3100 - val_loss: 1.2031
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━



✅ rnn_3layer - F1: 0.2924

Training rnn_cells_32...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 76ms/step - accuracy: 0.3729 - loss: 1.1329 - val_accuracy: 0.3900 - val_loss: 1.0772
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.4138 - loss: 1.0902 - val_accuracy: 0.4200 - val_loss: 1.0958
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - accuracy: 0.7605 - loss: 0.6928 - val_accuracy: 0.4200 - val_loss: 1.2733
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.9355 - loss: 0.3416 - val_accuracy: 0.4600 - val_loss: 1.4621
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.9953 - loss: 0.1383 - val_accuracy: 0.4600 - val_loss: 1.5836
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 1.0000 - loss: 0.0605 - val_accuracy: 0.4300 - val_loss: 1.5787
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━



✅ rnn_cells_32 - F1: 0.4461

Training rnn_cells_64...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 85ms/step - accuracy: 0.3114 - loss: 1.2338 - val_accuracy: 0.3800 - val_loss: 1.0988
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.3982 - loss: 1.1132 - val_accuracy: 0.3300 - val_loss: 1.1682
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.6581 - loss: 0.8048 - val_accuracy: 0.3300 - val_loss: 1.4484
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.8913 - loss: 0.3600 - val_accuracy: 0.3500 - val_loss: 1.7302
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.9847 - loss: 0.0950 - val_accuracy: 0.3200 - val_loss: 1.9074
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.9987 - loss: 0.0303 - val_accuracy: 0.3600 - val_loss: 1.9850
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━



✅ rnn_cells_64 - F1: 0.3886

Training rnn_cells_128...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 86ms/step - accuracy: 0.3594 - loss: 1.2023 - val_accuracy: 0.3800 - val_loss: 1.0853
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.3786 - loss: 1.1122 - val_accuracy: 0.3100 - val_loss: 1.4087
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.3870 - loss: 1.1915 - val_accuracy: 0.3500 - val_loss: 1.1589
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 0.2811 - loss: 1.2393 - val_accuracy: 0.3800 - val_loss: 1.1100
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.3069 - loss: 1.1813 - val_accuracy: 0.3800 - val_loss: 1.0923
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.3368 - loss: 1.1764 - val_accuracy: 0.3800 - val_loss: 1.1442
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━



✅ rnn_cells_128 - F1: 0.2962

Training rnn_unidirectional...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 77ms/step - accuracy: 0.3486 - loss: 1.1728 - val_accuracy: 0.3800 - val_loss: 1.0815
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.3986 - loss: 1.0896 - val_accuracy: 0.3900 - val_loss: 1.1494
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.6782 - loss: 0.7908 - val_accuracy: 0.4500 - val_loss: 1.2316
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step - accuracy: 0.8382 - loss: 0.4589 - val_accuracy: 0.4400 - val_loss: 1.4173
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.9854 - loss: 0.1920 - val_accuracy: 0.3900 - val_loss: 1.4486
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.9949 - loss: 0.0967 - val_accuracy: 0.4300 - val_loss: 1.6021
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━



✅ rnn_unidirectional - F1: 0.4566

Training rnn_bidirectional...
Epoch 1/10




[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 148ms/step - accuracy: 0.4014 - loss: 1.0650 - val_accuracy: 0.4100 - val_loss: 1.0333
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - accuracy: 0.5480 - loss: 0.8704 - val_accuracy: 0.5200 - val_loss: 1.0161
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 126ms/step - accuracy: 0.9039 - loss: 0.4097 - val_accuracy: 0.4600 - val_loss: 1.1697
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 124ms/step - accuracy: 0.9734 - loss: 0.2439 - val_accuracy: 0.4600 - val_loss: 1.2704
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - accuracy: 0.9990 - loss: 0.0826 - val_accuracy: 0.5100 - val_loss: 1.2692
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 127ms/step - accuracy: 1.0000 - loss: 0.0346 - val_accuracy: 0.5200 - val_loss: 1.3394
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━



✅ rnn_bidirectional - F1: 0.4433
