In [121]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
# Get the stock data using yfinance
# get 2015-2022 data to train our model
train_data = yf.download('BBCA.JK', start='2015-01-01', end='2022-12-31')

# get 2023 data to present date to test the SVM model
test_data = yf.download('BBCA.JK', start='2023-01-01', end='2024-11-01')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [None]:
#removing one level of column index
train_data.columns = train_data.columns.droplevel(-1)
train_data

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,2238.016357,2645.0,2655.0,2630.0,2655.0,29302500
2015-01-05,2233.785400,2640.0,2640.0,2625.0,2630.0,25434500
2015-01-06,2216.863281,2620.0,2640.0,2600.0,2600.0,48681500
2015-01-07,2221.093262,2625.0,2640.0,2610.0,2610.0,40334000
2015-01-08,2195.709717,2595.0,2630.0,2595.0,2625.0,54457000
...,...,...,...,...,...,...
2022-12-26,8137.984863,8575.0,8625.0,8500.0,8500.0,20495900
2022-12-27,8161.709961,8600.0,8650.0,8575.0,8600.0,27193800
2022-12-28,8209.162109,8650.0,8650.0,8550.0,8650.0,40420200
2022-12-29,8137.984863,8575.0,8600.0,8500.0,8525.0,47020900


In [124]:
test_data.columns = test_data.columns.droplevel(-1)
test_data

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-02,8114.259766,8550.0,8600.0,8500.0,8575.0,10653900
2023-01-03,8114.259766,8550.0,8600.0,8525.0,8550.0,27399100
2023-01-04,7924.450684,8350.0,8575.0,8350.0,8525.0,90918800
2023-01-05,7829.547363,8250.0,8375.0,8150.0,8350.0,128838500
2023-01-06,7876.999512,8300.0,8325.0,8100.0,8100.0,69286600
...,...,...,...,...,...,...
2024-10-25,10696.649414,10750.0,10750.0,10625.0,10650.0,34847500
2024-10-28,10547.394531,10600.0,10825.0,10600.0,10825.0,39061500
2024-10-29,10447.890625,10500.0,10575.0,10500.0,10525.0,49680100
2024-10-30,10298.634766,10350.0,10400.0,10275.0,10275.0,114484100


In [None]:
# sort by date
train_data = train_data.sort_values('Date')
test_data = test_data.sort_values('Date')

train_data.reset_index(inplace=True)
test_data.reset_index(inplace=True)

In [126]:
train_data

Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2015-01-02,2238.016357,2645.0,2655.0,2630.0,2655.0,29302500
1,2015-01-05,2233.785400,2640.0,2640.0,2625.0,2630.0,25434500
2,2015-01-06,2216.863281,2620.0,2640.0,2600.0,2600.0,48681500
3,2015-01-07,2221.093262,2625.0,2640.0,2610.0,2610.0,40334000
4,2015-01-08,2195.709717,2595.0,2630.0,2595.0,2625.0,54457000
...,...,...,...,...,...,...,...
1994,2022-12-26,8137.984863,8575.0,8625.0,8500.0,8500.0,20495900
1995,2022-12-27,8161.709961,8600.0,8650.0,8575.0,8600.0,27193800
1996,2022-12-28,8209.162109,8650.0,8650.0,8550.0,8650.0,40420200
1997,2022-12-29,8137.984863,8575.0,8600.0,8500.0,8525.0,47020900


In [127]:
test_data

Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-01-02,8114.259766,8550.0,8600.0,8500.0,8575.0,10653900
1,2023-01-03,8114.259766,8550.0,8600.0,8525.0,8550.0,27399100
2,2023-01-04,7924.450684,8350.0,8575.0,8350.0,8525.0,90918800
3,2023-01-05,7829.547363,8250.0,8375.0,8150.0,8350.0,128838500
4,2023-01-06,7876.999512,8300.0,8325.0,8100.0,8100.0,69286600
...,...,...,...,...,...,...,...
432,2024-10-25,10696.649414,10750.0,10750.0,10625.0,10650.0,34847500
433,2024-10-28,10547.394531,10600.0,10825.0,10600.0,10825.0,39061500
434,2024-10-29,10447.890625,10500.0,10575.0,10500.0,10525.0,49680100
435,2024-10-30,10298.634766,10350.0,10400.0,10275.0,10275.0,114484100


In [None]:
# getting the data ready for the model
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data_train = scaler.fit_transform(train_data['Adj Close'].values.reshape(-1, 1))
scaled_data_test = scaler.fit_transform(test_data['Adj Close'].values.reshape(-1, 1))

In [None]:
# Processing data for LSTM model
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        a = data[i:(i + time_step), 0]
        X.append(a)
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 60

In [None]:
# Create the data for train and test
train_size = int(len(scaled_data_train) * 0.8)
test_size = len(scaled_data_train) - train_size
train_data1, test_data1 = scaled_data_train[0:train_size, :], scaled_data_train[train_size:len(scaled_data_train), :]

X_train1, y_train1 = create_dataset(train_data1, time_step)
X_test1, y_test1 = create_dataset(test_data1, time_step)

X_train1 = X_train1.reshape(X_train1.shape[0], X_train1.shape[1], 1)
X_test1 = X_test1.reshape(X_test1.shape[0], X_test1.shape[1], 1)

In [None]:
# LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(25))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [None]:
#fiting data into the model
model.fit(X_train1, y_train1, batch_size=1, epochs=10)

Epoch 1/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 20ms/step - loss: 0.0087
Epoch 2/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 16ms/step - loss: 0.0021
Epoch 3/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 25ms/step - loss: 0.0013
Epoch 4/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 19ms/step - loss: 0.0014
Epoch 5/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 20ms/step - loss: 0.0012
Epoch 6/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 20ms/step - loss: 0.0012
Epoch 7/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 19ms/step - loss: 0.0012
Epoch 8/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - loss: 0.0012
Epoch 9/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 26ms/step - loss: 0.0011
Epoch 10/10
[1m1538/1538[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x22ed6b7deb0>

In [None]:
#predicting the data
train_predict = model.predict(X_train1)
test_predict = model.predict(X_test1)

[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 65ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step


In [None]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train1 = scaler.inverse_transform([y_train1])
y_test1 = scaler.inverse_transform([y_test1])

In [None]:
#calculating the performance of model for train data
train_rmse = np.sqrt(mean_squared_error(y_train1[0], train_predict[:, 0]))
train_mae = mean_absolute_error(y_train1[0], train_predict[:, 0])
train_r2 = r2_score(y_train1[0], train_predict[:, 0])
test_rmse = np.sqrt(mean_squared_error(y_test1[0], test_predict[:, 0]))
test_mae = mean_absolute_error(y_test1[0], test_predict[:, 0])
test_r2 = r2_score(y_test1[0], test_predict[:, 0])

print(f'Train RMSE:', train_rmse)
print(f'Train MAE:', train_mae)
print(f'Train R2:', train_r2)
print(f'Test RMSE:', test_rmse)
print(f'Test MAE:', test_mae)
print(f'Test R2:', test_r2)

Train RMSE: 94.25439039125953
Train MAE: 79.30369511910459
Train R2: 0.9783864582954773
Test RMSE: 274.33287231122944
Test MAE: 262.8416106993445
Test R2: 0.14245765155576395


In [None]:
# performing the same for test data
train_size = int(len(scaled_data_test) * 0.8)
test_size = len(scaled_data_test) - train_size
train_data2, test_data2 = scaled_data_test[0:train_size, :], scaled_data_test[train_size:len(scaled_data_test), :]

X_train2, y_train2 = create_dataset(train_data2, time_step)
X_test2, y_test2 = create_dataset(test_data2, time_step)

X_train2 = X_train2.reshape(X_train2.shape[0], X_train2.shape[1], 1)
X_test2 = X_test2.reshape(X_test2.shape[0], X_test2.shape[1], 1)

In [None]:
# fitting the data into the model
model.fit(X_train2, y_train2, batch_size=1, epochs=10)

Epoch 1/10


[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 48ms/step - loss: 0.0031
Epoch 2/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0033
Epoch 3/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 48ms/step - loss: 0.0020
Epoch 4/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0020
Epoch 5/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0021
Epoch 6/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0024
Epoch 7/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0017
Epoch 8/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 46ms/step - loss: 0.0022
Epoch 9/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/step - loss: 0.0019
Epoch 10/10
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 47ms/

<keras.src.callbacks.history.History at 0x22ee99344a0>

In [None]:
# predicting the data
train_predict2 = model.predict(X_train2)
test_predict2 = model.predict(X_test2)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step


In [139]:
train_predict2 = scaler.inverse_transform(train_predict2)
test_predict2 = scaler.inverse_transform(test_predict2)
y_train2 = scaler.inverse_transform([y_train2])
y_test2 = scaler.inverse_transform([y_test2])

In [None]:
#calculating the performance of model for test data
train_rmse = np.sqrt(mean_squared_error(y_train2[0], train_predict2[:, 0]))
train_mae = mean_absolute_error(y_train2[0], train_predict2[:, 0])
train_r2 = r2_score(y_train2[0], train_predict2[:, 0])
test_rmse = np.sqrt(mean_squared_error(y_test2[0], test_predict2[:, 0]))
test_mae = mean_absolute_error(y_test2[0], test_predict2[:, 0])
test_r2 = r2_score(y_test2[0], test_predict2[:, 0])

print(f'Train RMSE:', train_rmse)
print(f'Train MAE:', train_mae)
print(f'Train R2:', train_r2)
print(f'Test RMSE:', test_rmse)
print(f'Test MAE:', test_mae)
print(f'Test R2:', test_r2)

Train RMSE: 141.4029522789834
Train MAE: 116.50531684027777
Train R2: 0.8848845363085192
Test RMSE: 350.18408094555315
Test MAE: 329.93507667824076
Test R2: -4.525725422008536
