In [None]:
# Load the dataset (assuming you already have the MACD_12_26 column)
df = pd.read_csv('indoBERT_lexicon_5TIs_Volume.csv')

# Convert 'Date' column to datetime objects if it's not already
df['Date'] = pd.to_datetime(df['Date'])

# Separate scaler for 'Close' for inverse transformation later
sc_close = MinMaxScaler()
df['Close'] = sc_close.fit_transform(df[['Close']])

# Normalize 'Lexicon Sentiment Score' and 'MACD_12_26'
sc = MinMaxScaler()
df[['IndoBERT Sentiment Score', 'MACD_12_26', 'RSI_28', 'Volume', 'IndoBERT Sentiment Score']] = sc.fit_transform(df[['IndoBERT Sentiment Score', 'MACD_12_26', 'RSI_28', 'Volume', 'IndoBERT Sentiment Score']])

# Split the data into training and test sets
train, test = train_test_split(df, test_size=0.2, shuffle=False)

# Define a function to create the proper format of the dataset
def create_dataset(X, look_back, forecast_horizon):
    Xs, ys = [], []
    # Pad the beginning with the first value
    padded_X = np.pad(X, ((look_back, forecast_horizon - 1), (0, 0)), mode='edge')
    for i in range(len(padded_X) - look_back - forecast_horizon + 1):
        v = padded_X[i:(i + look_back)]
        y = padded_X[i + look_back:i + look_back + forecast_horizon, 1]  # Predict 'Close'
        Xs.append(v)
        ys.append(y)
    return np.array(Xs), np.array(ys)

# Hyperparameters
LOOK_BACK = 90
FORECAST_HORIZON = 60

# Prepare the data, now with the additional 'MACD_12_26' feature
features = ['IndoBERT Sentiment Score', 'Close', 'MACD_12_26', 'RSI_28', 'Volume']
X_train, Y_train = create_dataset(train[features].values, LOOK_BACK, FORECAST_HORIZON)
X_test, Y_test = create_dataset(test[features].values, LOOK_BACK, FORECAST_HORIZON)

class MyHyperModel(HyperModel):
    def build(self, hp):
        model = Sequential()
        model.add(Input(shape=(LOOK_BACK, len(features))))  # Updated to 3 to reflect new feature count
        model.add(Bidirectional(LSTM(units=hp.Int('units_1', min_value=64, max_value=512, step=32), return_sequences=True)))
        model.add(Dropout(rate=hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.05)))
        model.add(Bidirectional(LSTM(units=hp.Int('units_2', min_value=64, max_value=512, step=32), return_sequences=True)))
        model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.05)))
        model.add(Bidirectional(LSTM(units=hp.Int('units_3', min_value=64, max_value=512, step=32))))
        model.add(Dense(FORECAST_HORIZON))

        model.compile(optimizer='adam', loss='mse')
        return model

# Set up TensorBoard callback
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Initialize the tuner with TensorBoard logging
tuner = RandomSearch(
    MyHyperModel(),
    objective='val_loss',
    max_trials=5,
    executions_per_trial=2,
    directory='logs',
    project_name='5_tis_indobert',
    overwrite=True
)

# Search for the best hyperparameters
tuner.search(X_train, Y_train, epochs=30, batch_size=64, validation_split=0.1, callbacks=[tensorboard_callback])

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]

tuner.results_summary(num_trials=3)

# Predict
Y_pred = best_model.predict(X_test)

In [None]:
# Inverse transform the predictions and true values for 'Close'
Y_train_inv = sc_close.inverse_transform(Y_train.reshape(-1, 1))
Y_test_inv = sc_close.inverse_transform(Y_test.reshape(-1, 1))
Y_pred_inv = sc_close.inverse_transform(Y_pred.reshape(-1, 1))

# Predict for training data
Y_train_pred = best_model.predict(X_train)

# Predict for test data
Y_test_pred = best_model.predict(X_test)


# Inverse transform the predictions for training data
Y_train_pred_inv = sc_close.inverse_transform(Y_train_pred.reshape(-1, 1))
Y_test_pred_inv = sc_close.inverse_transform(Y_test_pred.reshape(-1, 1))

# Get the dates from your original DataFrame
dates = df['Date'].values

# Create Plotly figure
fig = go.Figure()

# Add training data trace
fig.add_trace(go.Scatter(x=dates[LOOK_BACK:len(Y_train) + LOOK_BACK],
                         y=Y_train_inv[::FORECAST_HORIZON].flatten(),
                         mode='lines',
                         name='Train',
                         line=dict(color='blue')))

# Add training predictions trace
fig.add_trace(go.Scatter(x=dates[LOOK_BACK:len(Y_train) + LOOK_BACK],
                         y=Y_train_pred_inv[::FORECAST_HORIZON].flatten(),
                         mode='lines',
                         name='Train Predictions',
                         line=dict(color='yellow')))

# Add test data trace
fig.add_trace(go.Scatter(x=dates[len(Y_train) + LOOK_BACK:],
                         y=Y_test_inv[::FORECAST_HORIZON].flatten(),
                         mode='lines',
                         name='Test',
                         line=dict(color='red')))

# Add test predictions trace
fig.add_trace(go.Scatter(x=dates[len(Y_train) + LOOK_BACK:],
                         y=Y_test_pred_inv[::FORECAST_HORIZON].flatten(),
                         mode='lines',
                         name='Test Predictions',
                         line=dict(color='green')))

# Fetch Indonesian holidays dynamically using the holidays package
indonesia_holidays = holidays.Indonesia(years=[2024])  # Adjust years as needed

# Start generating future dates but only include weekdays and exclude holidays
future_dates = []
current_date = df['Date'].iloc[-1]  # Last date in your dataset

while len(future_dates) < 60:
    current_date += pd.DateOffset(days=1)

    # Exclude weekends (Saturday = 5, Sunday = 6) and holidays
    if current_date.weekday() < 5 and current_date not in indonesia_holidays:
        future_dates.append(current_date)

# Now you have a list of 60 valid future dates excluding weekends and holidays
future_dates = pd.to_datetime(future_dates)

# Predict 60 days into the future
last_look_back_days = df[features].values[-LOOK_BACK:]
future_predictions = []

for _ in range(int(60 / FORECAST_HORIZON)):  # Iterate in chunks of FORECAST_HORIZON
    prediction = best_model.predict(last_look_back_days.reshape(1, LOOK_BACK, len(features)))
    future_predictions.extend(prediction[0])
    last_look_back_days = np.concatenate((last_look_back_days[FORECAST_HORIZON:],
                                          np.repeat(prediction, len(features), axis=0).reshape(FORECAST_HORIZON, len(features))),
                                         axis=0)

# Inverse transform future predictions
future_predictions_inv = sc_close.inverse_transform(np.array(future_predictions).reshape(-1, 1))

# Add future predictions trace to the plot
fig.add_trace(go.Scatter(x=future_dates,
                         y=future_predictions_inv.flatten(),
                         mode='lines',
                         name='Future Predictions',
                         line=dict(color='purple')))

# Update layout
fig.update_layout(title='Multivariate BBCA Stock Price Forecasting (Close + MACD + IndoBERT Sentiment Score)',
                  xaxis_title='Date',
                  yaxis_title='Close')

# Show the plot
fig.show()