In [1]:
import pandas as pd
import numpy as np
import itertools
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

## Read Data

In [2]:
df = pd.read_csv('Dataset/BKRKF.csv', delimiter=';')
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,04/01/2021,0.30,0.32,0.30,0.32,0.267033,9600
1,05/01/2021,0.31,0.31,0.31,0.31,0.258688,1820300
2,06/01/2021,0.31,0.31,0.31,0.31,0.258688,0
3,07/01/2021,0.30,0.32,0.29,0.32,0.267033,3346700
4,08/01/2021,0.30,0.30,0.28,0.28,0.233654,1000
...,...,...,...,...,...,...,...
907,13/08/2024,0.30,0.30,0.30,0.30,0.300000,0
908,14/08/2024,0.28,0.33,0.28,0.33,0.330000,89700
909,15/08/2024,0.31,0.31,0.31,0.31,0.310000,7000
910,16/08/2024,0.27,0.27,0.27,0.27,0.270000,476300


In [3]:
# Ubah kolom Date menjadi datetime
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
# # Memisahkan Tahun, Bulan, dan Hari
# df['Year']  = df['Date'].dt.year
# df['Month'] = df['Date'].dt.month
# df['Day']  = df['Date'].dt.day
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-01-04,0.30,0.32,0.30,0.32,0.267033,9600
1,2021-01-05,0.31,0.31,0.31,0.31,0.258688,1820300
2,2021-01-06,0.31,0.31,0.31,0.31,0.258688,0
3,2021-01-07,0.30,0.32,0.29,0.32,0.267033,3346700
4,2021-01-08,0.30,0.30,0.28,0.28,0.233654,1000
...,...,...,...,...,...,...,...
907,2024-08-13,0.30,0.30,0.30,0.30,0.300000,0
908,2024-08-14,0.28,0.33,0.28,0.33,0.330000,89700
909,2024-08-15,0.31,0.31,0.31,0.31,0.310000,7000
910,2024-08-16,0.27,0.27,0.27,0.27,0.270000,476300


In [4]:
df_select = df[['Open', 'Close']]
# Menghitung rata-rata
df_select['average'] = df_select.mean(axis=1)
df_select

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_select['average'] = df_select.mean(axis=1)


Unnamed: 0,Open,Close,average
0,0.30,0.32,0.310
1,0.31,0.31,0.310
2,0.31,0.31,0.310
3,0.30,0.32,0.310
4,0.30,0.28,0.290
...,...,...,...
907,0.30,0.30,0.300
908,0.28,0.33,0.305
909,0.31,0.31,0.310
910,0.27,0.27,0.270


## Normalization

In [6]:
# --------------- Z-Score Normalization ---------------
# Inisialisasi StandardScaler
scaler = StandardScaler()

# Fit dan transform data
df_scaled = scaler.fit_transform(df_select['average'].values.reshape(-1, 1))
# Mengubah hasil kembali ke DataFrame
df_scaled = pd.DataFrame({'average': df_scaled.flatten()})
df_scaled

Unnamed: 0,average
0,0.028221
1,0.028221
2,0.028221
3,0.028221
4,-0.556728
...,...
907,-0.264253
908,-0.118016
909,0.028221
910,-1.141677


## Split Data

In [7]:
# Membuat fungsi untuk membagi data menjadi training dan testing
def make_train_test_splits(X, y, split_size):
    X_train = X[:split_size]
    y_train = y[:split_size]
    X_test  = X[split_size:]
    y_test  = y[split_size:]
    return X_train, X_test, y_train, y_test

In [8]:
# --------------- Split Data --------------
X = df_scaled['average']
y = df_scaled['average']

# Membagi data menjadi 80% training dan 20% testing
test_split = 0.2
split_size = int(len(df) * (1-test_split))

X_train, X_test, y_train, y_test = make_train_test_splits(X, y, split_size)

print('Jumlah data training :', len(X_train))
print('Jumlah data testing  :', len(X_test))

Jumlah data training : 729
Jumlah data testing  : 183


## Hyperparameter tuning LSTM

In [9]:
def mape(y_test, pred):
    y_test, pred = np.array(y_test), np.array(pred)
    mape = np.mean(np.abs((y_test - pred) / y_test))
    return round(mape*100, 2)

In [10]:
np.random.seed(123)
tf.random.set_seed(123)
tf.keras.utils.set_random_seed(123)

list_unit      = [50, 100]
list_dropout   = [0.3, 0.5]
list_optimizer = ['adam', 'rmsprop', 'sgd']
list_lr        = [0.01, 0.001, 0.0001]

results_lstm = []

# Grid search over hyperparameters
for unit, dropout, opt, lr in itertools.product(list_unit, list_dropout, list_optimizer, list_lr):
    # --------------- LSTM ---------------
    LSTMmodel = Sequential()
    LSTMmodel.add(LSTM(units=unit, return_sequences=True, input_shape=(1, 1)))
    LSTMmodel.add(Dropout(dropout))
    LSTMmodel.add(LSTM(units=unit, return_sequences=True))
    LSTMmodel.add(Dropout(dropout))
    LSTMmodel.add(LSTM(units=unit))
    LSTMmodel.add(Dropout(dropout))
    LSTMmodel.add(Dense(units=1))
    
    # Compile the model
    if opt == 'adam':
        LSTMmodel.compile(optimizer=Adam(learning_rate=lr), loss='mean_squared_error')
    elif opt == 'rmsprop':
        LSTMmodel.compile(optimizer=RMSprop(learning_rate=lr), loss='mean_squared_error')
    elif opt == 'sgd':
        LSTMmodel.compile(optimizer=SGD(learning_rate=lr), loss='mean_squared_error')
        
    BATCH_SIZE = 32

    callbacks_lstm = [ModelCheckpoint('best_lstm.h5', monitor='val_loss', verbose=1, save_best_only=True),
                        EarlyStopping(monitor='val_loss', patience=10, verbose=1)]
    
    history_lstm = LSTMmodel.fit(X_train, 
                                y_train, 
                                epochs=100,
                                batch_size=BATCH_SIZE,
                                callbacks=callbacks_lstm,
                                validation_data=(X_test, y_test))
    n_model_lstm = load_model('best_lstm.h5')
    # Prediksi
    y_pred_lstm = n_model_lstm.predict(X_test)

    _mape_lstm = mape(y_test.values.reshape(1, -1), y_pred_lstm.reshape(1, -1))
    _mae_lstm  = round(mean_squared_error(y_test.values.reshape(1, -1), y_pred_lstm.reshape(1, -1)), 5)

    print(f'LSTM unit: {unit}, Dropout{dropout}, Learning rate: {lr}, Optimizer: {opt}, Mape: {_mape_lstm}%, MAE: {_mae_lstm}')
    print('----------------------------------------------------------------------------------------------------')

    # Store the result
    results_lstm.append((unit, dropout, lr, opt, _mape_lstm, _mae_lstm))

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.08318, saving model to best_lstm.h5
Epoch 2/100
Epoch 2: val_loss improved from 0.08318 to 0.04626, saving model to best_lstm.h5
Epoch 3/100
Epoch 3: val_loss improved from 0.04626 to 0.01778, saving model to best_lstm.h5
Epoch 4/100
Epoch 4: val_loss did not improve from 0.01778
Epoch 5/100
Epoch 5: val_loss did not improve from 0.01778
Epoch 6/100
Epoch 6: val_loss improved from 0.01778 to 0.01320, saving model to best_lstm.h5
Epoch 7/100
Epoch 7: val_loss did not improve from 0.01320
Epoch 8/100
Epoch 8: val_loss did not improve from 0.01320
Epoch 9/100
Epoch 9: val_loss improved from 0.01320 to 0.01229, saving model to best_lstm.h5
Epoch 10/100
Epoch 10: val_loss did not improve from 0.01229
Epoch 11/100
Epoch 11: val_loss did not improve from 0.01229
Epoch 12/100
Epoch 12: val_loss did not improve from 0.01229
Epoch 13/100
Epoch 13: val_loss did not improve from 0.01229
Epoch 14/100
Epoch 14: val_loss did not improve from 0.0122

In [11]:
df_results_LSTM = pd.DataFrame(results_lstm, columns=['units', 'dropout', 'learning_rate', 'optimizer', 'MAPE(%)', 'MAE'])
df_results_LSTM

Unnamed: 0,units,dropout,learning_rate,optimizer,MAPE(%),MAE
0,50,0.3,0.01,adam,12.85,0.00648
1,50,0.3,0.001,adam,4.97,0.00626
2,50,0.3,0.0001,adam,6.19,0.01467
3,50,0.3,0.01,rmsprop,13.42,0.01184
4,50,0.3,0.001,rmsprop,7.31,0.01586
5,50,0.3,0.0001,rmsprop,10.3,0.02079
6,50,0.3,0.01,sgd,118.61,1.77769
7,50,0.3,0.001,sgd,102.35,1.72752
8,50,0.3,0.0001,sgd,100.27,1.72121
9,50,0.5,0.01,adam,6.68,0.01318


In [12]:
# Mengurutkan berdasarkan mape dan mae, lalu mengambil 1 hasil terbaik
best_result_lstm = df_results_LSTM.sort_values(by=['MAPE(%)', 'MAE']).head(1)

print("Best parameter LSTM berdasarkan MAPE dan MAE terkecil:")
print(best_result_lstm)

Best parameter LSTM berdasarkan MAPE dan MAE terkecil:
    units  dropout  learning_rate optimizer  MAPE(%)      MAE
18    100      0.3           0.01      adam     3.13  0.00092


## Hyperparameter tuning BiLSTM

In [13]:
np.random.seed(123)
tf.random.set_seed(123)
tf.keras.utils.set_random_seed(123)

list_unit      = [50, 100]
list_dropout   = [0.3, 0.5]
list_optimizer = ['adam', 'rmsprop', 'sgd']
list_lr        = [0.01, 0.001, 0.0001]

results_bilstm = []

# Grid search over hyperparameters
for unit, dropout, opt, lr in itertools.product(list_unit, list_dropout, list_optimizer, list_lr):
    # --------------- BiLSTM ---------------
    BiLSTMmodel = Sequential()
    BiLSTMmodel.add(Bidirectional(LSTM(units=unit, return_sequences=True), input_shape=(1, 1)))
    BiLSTMmodel.add(Dropout(dropout))
    BiLSTMmodel.add(Bidirectional(LSTM(units=unit, return_sequences=True)))
    BiLSTMmodel.add(Dropout(dropout))
    BiLSTMmodel.add(Bidirectional(LSTM(units=unit)))
    BiLSTMmodel.add(Dropout(dropout))
    BiLSTMmodel.add(Dense(units=1))
    
    # Compile the model
    if opt == 'adam':
        BiLSTMmodel.compile(optimizer=Adam(learning_rate=lr), loss='mean_squared_error')
    elif opt == 'rmsprop':
        BiLSTMmodel.compile(optimizer=RMSprop(learning_rate=lr), loss='mean_squared_error')
    elif opt == 'sgd':
        BiLSTMmodel.compile(optimizer=SGD(learning_rate=lr), loss='mean_squared_error')
        
    BATCH_SIZE = 32

    callbacks_bilstm = [ModelCheckpoint('best_bilstm.h5', monitor='val_loss', verbose=1, save_best_only=True),
                        EarlyStopping(monitor='val_loss', patience=10, verbose=1)]
    
    history_bilstm = BiLSTMmodel.fit(X_train, 
                                y_train, 
                                epochs=100,
                                batch_size=BATCH_SIZE,
                                callbacks=callbacks_bilstm,
                                validation_data=(X_test, y_test))
    n_model_bilstm = load_model('best_bilstm.h5')
    # Prediksi
    y_pred_bilstm = n_model_bilstm.predict(X_test)

    _mape_bilstm = mape(scaler.inverse_transform(y_test.values.reshape(1, -1)), scaler.inverse_transform(y_pred_bilstm.reshape(1, -1)))
    _mae_bilstm  = round(mean_squared_error(scaler.inverse_transform(y_test.values.reshape(1, -1)), scaler.inverse_transform(y_pred_bilstm.reshape(1, -1))), 5)

    print(f'BiLSTM unit: {unit}, Dropout{dropout}, Learning rate: {lr}, Optimizer: {opt}, Mape: {_mape_bilstm}%, MAE: {_mae_bilstm}')
    print('----------------------------------------------------------------------------------------------------')

    # Store the result
    results_bilstm.append((unit, dropout, lr, opt, _mape_bilstm, _mae_bilstm))

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.02856, saving model to best_bilstm.h5
Epoch 2/100
Epoch 2: val_loss improved from 0.02856 to 0.01818, saving model to best_bilstm.h5
Epoch 3/100
Epoch 3: val_loss did not improve from 0.01818
Epoch 4/100
Epoch 4: val_loss did not improve from 0.01818
Epoch 5/100
Epoch 5: val_loss improved from 0.01818 to 0.01216, saving model to best_bilstm.h5
Epoch 6/100
Epoch 6: val_loss improved from 0.01216 to 0.01193, saving model to best_bilstm.h5
Epoch 7/100
Epoch 7: val_loss improved from 0.01193 to 0.00499, saving model to best_bilstm.h5
Epoch 8/100
Epoch 8: val_loss did not improve from 0.00499
Epoch 9/100
Epoch 9: val_loss did not improve from 0.00499
Epoch 10/100
Epoch 10: val_loss did not improve from 0.00499
Epoch 11/100
Epoch 11: val_loss did not improve from 0.00499
Epoch 12/100
Epoch 12: val_loss did not improve from 0.00499
Epoch 13/100
Epoch 13: val_loss improved from 0.00499 to 0.00459, saving model to best_bilstm.h5
Epoch 14/100


In [14]:
df_results_BiLSTM = pd.DataFrame(results_bilstm, columns=['units', 'dropout', 'learning_rate', 'optimizer', 'MAPE(%)', 'MAE'])
df_results_BiLSTM

Unnamed: 0,units,dropout,learning_rate,optimizer,MAPE(%),MAE
0,50,0.3,0.01,adam,0.5,0.0
1,50,0.3,0.001,adam,0.39,0.0
2,50,0.3,0.0001,adam,0.53,1e-05
3,50,0.3,0.01,rmsprop,1.11,2e-05
4,50,0.3,0.001,rmsprop,0.52,1e-05
5,50,0.3,0.0001,rmsprop,0.61,1e-05
6,50,0.3,0.01,sgd,11.96,0.00209
7,50,0.3,0.001,sgd,11.82,0.00202
8,50,0.3,0.0001,sgd,11.79,0.00202
9,50,0.5,0.01,adam,0.46,1e-05


In [15]:
# Mengurutkan berdasarkan mape dan mae, lalu mengambil 1 hasil terbaik
best_result_bilstm = df_results_BiLSTM.sort_values(by=['MAPE(%)', 'MAE']).head(1)

print("Best parameter BiLSTM berdasarkan MAPE dan MAE terkecil:")
print(best_result_bilstm)

Best parameter BiLSTM berdasarkan MAPE dan MAE terkecil:
    units  dropout  learning_rate optimizer  MAPE(%)  MAE
18    100      0.3           0.01      adam     0.27  0.0
