In [70]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [71]:
# Get the stock data using yfinance API:
# get 2009-2019 data to train our model
train_data = yf.download('BBCA.JK', start='2010-01-04', end='2019-12-01')

# get 2020 data to present date to test the SVM model
# test_data = yf.download('BBCA.JK', start='2020-01-02', end=datetime.date.today())
test_data = yf.download('BBCA.JK', start='2020-01-04', end='2021-12-01')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [72]:
train_data.columns = train_data.columns.droplevel(-1)
train_data

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,796.387695,1010.0,1010.0,965.0,1010.0,77812500
2010-01-05,804.272827,1020.0,1020.0,990.0,1020.0,44442500
2010-01-06,780.617615,990.0,1020.0,990.0,990.0,27782500
2010-01-07,784.560242,995.0,995.0,965.0,995.0,38632500
2010-01-08,764.847595,970.0,995.0,970.0,970.0,27427500
...,...,...,...,...,...,...
2019-11-25,5610.080078,6275.0,6335.0,6275.0,6330.0,55133500
2019-11-26,5619.020508,6285.0,6330.0,6270.0,6330.0,192783500
2019-11-27,5632.430176,6300.0,6300.0,6240.0,6270.0,58833500
2019-11-28,5587.729004,6250.0,6295.0,6245.0,6295.0,44248000


In [73]:
test_data.columns = test_data.columns.droplevel(-1)
test_data

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-06,6040.391113,6735.0,6750.0,6690.0,6720.0,27300000
2020-01-07,6044.875977,6740.0,6770.0,6730.0,6735.0,45022500
2020-01-08,5991.063965,6680.0,6745.0,6670.0,6670.0,53692500
2020-01-09,6044.875977,6740.0,6755.0,6730.0,6740.0,40363500
2020-01-10,6031.422363,6725.0,6825.0,6710.0,6765.0,73362000
...,...,...,...,...,...,...
2021-11-24,6959.792969,7475.0,7575.0,7425.0,7575.0,35181400
2021-11-25,6913.239258,7425.0,7525.0,7425.0,7450.0,40216100
2021-11-26,6773.578125,7275.0,7450.0,7250.0,7400.0,113288400
2021-11-29,6889.962891,7400.0,7425.0,7175.0,7275.0,87907900


In [74]:
# sort by date
train_data = train_data.sort_values('Date')
test_data = test_data.sort_values('Date')

# fix the date 
train_data.reset_index(inplace=True)
test_data.reset_index(inplace=True)

In [75]:
train_data

Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2010-01-04,796.387695,1010.0,1010.0,965.0,1010.0,77812500
1,2010-01-05,804.272827,1020.0,1020.0,990.0,1020.0,44442500
2,2010-01-06,780.617615,990.0,1020.0,990.0,990.0,27782500
3,2010-01-07,784.560242,995.0,995.0,965.0,995.0,38632500
4,2010-01-08,764.847595,970.0,995.0,970.0,970.0,27427500
...,...,...,...,...,...,...,...
2466,2019-11-25,5610.080078,6275.0,6335.0,6275.0,6330.0,55133500
2467,2019-11-26,5619.020508,6285.0,6330.0,6270.0,6330.0,192783500
2468,2019-11-27,5632.430176,6300.0,6300.0,6240.0,6270.0,58833500
2469,2019-11-28,5587.729004,6250.0,6295.0,6245.0,6295.0,44248000


In [76]:
test_data

Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2020-01-06,6040.391113,6735.0,6750.0,6690.0,6720.0,27300000
1,2020-01-07,6044.875977,6740.0,6770.0,6730.0,6735.0,45022500
2,2020-01-08,5991.063965,6680.0,6745.0,6670.0,6670.0,53692500
3,2020-01-09,6044.875977,6740.0,6755.0,6730.0,6740.0,40363500
4,2020-01-10,6031.422363,6725.0,6825.0,6710.0,6765.0,73362000
...,...,...,...,...,...,...,...
460,2021-11-24,6959.792969,7475.0,7575.0,7425.0,7575.0,35181400
461,2021-11-25,6913.239258,7425.0,7525.0,7425.0,7450.0,40216100
462,2021-11-26,6773.578125,7275.0,7450.0,7250.0,7400.0,113288400
463,2021-11-29,6889.962891,7400.0,7425.0,7175.0,7275.0,87907900


In [77]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data_train = scaler.fit_transform(train_data['Adj Close'].values.reshape(-1, 1))
scaled_data_test = scaler.fit_transform(test_data['Adj Close'].values.reshape(-1, 1))

In [78]:
# Prepare the data for LSTM
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        a = data[i:(i + time_step), 0]
        X.append(a)
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Define time step
time_step = 60

In [79]:
# Create training and test sets
train_size = int(len(scaled_data_train) * 0.8)
test_size = len(scaled_data_train) - train_size
train_data1, test_data1 = scaled_data_train[0:train_size, :], scaled_data_train[train_size:len(scaled_data_train), :]

X_train1, y_train1 = create_dataset(train_data1, time_step)
X_test1, y_test1 = create_dataset(test_data1, time_step)

X_train1 = X_train1.reshape(X_train1.shape[0], X_train1.shape[1], 1)
X_test1 = X_test1.reshape(X_test1.shape[0], X_test1.shape[1], 1)

In [80]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [81]:
# Train the model
model.fit(X_train1, y_train1, batch_size=1, epochs=10)

Epoch 1/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 41ms/step - loss: 0.0035
Epoch 2/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 46ms/step - loss: 0.0011
Epoch 3/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 43ms/step - loss: 9.4645e-04
Epoch 4/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 18ms/step - loss: 8.9192e-04
Epoch 5/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 20ms/step - loss: 8.5295e-04
Epoch 6/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - loss: 8.2052e-04
Epoch 7/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 28ms/step - loss: 8.1723e-04
Epoch 8/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 28ms/step - loss: 6.5508e-04
Epoch 9/10
[1m1915/1915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 21ms/step - loss: 6.3386e-04
Epoch 10/10
[1m1915/1915[

<keras.src.callbacks.history.History at 0x22ed6db64b0>

In [82]:
# Predictions
train_predict = model.predict(X_train1)
test_predict = model.predict(X_test1)

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [83]:
# Inverse transform to get actual values
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train1 = scaler.inverse_transform([y_train1])
y_test1 = scaler.inverse_transform([y_test1])

In [84]:
# Calculate RMSE and MAE
train_rmse = np.sqrt(mean_squared_error(y_train1[0], train_predict[:, 0]))
train_mae = mean_absolute_error(y_train1[0], train_predict[:, 0])
test_rmse = np.sqrt(mean_squared_error(y_test1[0], test_predict[:, 0]))
test_mae = mean_absolute_error(y_test1[0], test_predict[:, 0])

print(f'Train RMSE:', train_rmse)
print(f'Train MAE:', train_mae)
print(f'Test RMSE:', test_rmse)
print(f'Test MAE:', test_mae)

Train RMSE: 37.43477016975227
Train MAE: 30.990307858259303
Test RMSE: 195.82614564267902
Test MAE: 170.43922002178925


In [85]:
train_size = int(len(scaled_data_test) * 0.8)
test_size = len(scaled_data_test) - train_size
train_data2, test_data2 = scaled_data_test[0:train_size, :], scaled_data_test[train_size:len(scaled_data_test), :]

X_train2, y_train2 = create_dataset(train_data2, time_step)
X_test2, y_test2 = create_dataset(test_data2, time_step)

X_train2 = X_train2.reshape(X_train2.shape[0], X_train2.shape[1], 1)
X_test2 = X_test2.reshape(X_test2.shape[0], X_test2.shape[1], 1)

In [86]:
model.fit(X_train2, y_train2, batch_size=1, epochs=10)

Epoch 1/10


[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 0.0047
Epoch 2/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.0023
Epoch 3/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - loss: 0.0023
Epoch 4/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.0021
Epoch 5/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - loss: 0.0018
Epoch 6/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - loss: 0.0019
Epoch 7/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - loss: 0.0017
Epoch 8/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.0018
Epoch 9/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - loss: 0.0019
Epoch 10/10
[1m311/311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - los

<keras.src.callbacks.history.History at 0x22ed80322d0>

In [87]:
train_predict = model.predict(X_train2)
test_predict = model.predict(X_test2)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


In [88]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
y_train2 = scaler.inverse_transform([y_train2])
y_test2 = scaler.inverse_transform([y_test2])

In [89]:
train_rmse = np.sqrt(mean_squared_error(y_train2[0], train_predict[:, 0]))
train_mae = mean_absolute_error(y_train2[0], train_predict[:, 0])
test_rmse = np.sqrt(mean_squared_error(y_test2[0], test_predict[:, 0]))
test_mae = mean_absolute_error(y_test2[0], test_predict[:, 0])

print(f'Train RMSE:', train_rmse)
print(f'Train MAE:', train_mae)
print(f'Test RMSE:', test_rmse)
print(f'Test MAE:', test_mae)

Train RMSE: 131.8824800786301
Train MAE: 109.35348422427653
Test RMSE: 121.09446521670864
Test MAE: 98.9205932617187
