In [None]:
# Import Library Model
import math
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from tensorflow.keras.layers import GRU, Dense, Dropout

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import tensorflow as tf
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load dataset dari sheet "BBRI"
path = "../Data/STOCK PRICE IDXBUMN DATASET.xlsx"
stock_price = pd.read_excel(path, sheet_name="AGRO")

# Hapus baris pertama
stock_price = stock_price.iloc[1:].reset_index(drop=True)

# Tampilkan 5 baris pertama setelah penghapusan
stock_price.head()

In [None]:
# Load dataset "sentimen" dari sheet "BBRI"
path = "../Data/CNBC NEWS SCREAPING DATASET.xlsx"
sentimen_stock = pd.read_excel(path, sheet_name="AGRO")

# Tampilkan 5 baris pertama
sentimen_stock.head()

### Data Understanding

In [None]:
#Data Information "stock_price"
stock_price.info()

In [None]:
#Data Information "sentimen_stock"
sentimen_stock.info()

In [None]:
# Mengubah tipe data "stock_price"
stock_price['Date'] = pd.to_datetime(stock_price['Date']).dt.date
stock_price['Date'] = pd.to_datetime(stock_price['Date'])
stock_price[['Open', 'High', 'Low', 'Close', 'Volatilitas Pasar']] = stock_price[['Open', 'High', 'Low', 'Close', 'Volatilitas Pasar']].astype(int)
stock_price['Volume'] = stock_price['Volume'].astype(int)

In [None]:
# Mengubah tipe data "sentimen_stock"
sentimen_stock['date'] = pd.to_datetime(sentimen_stock['date']).dt.date
sentimen_stock['date'] = pd.to_datetime(sentimen_stock['date'])  # Konversi ulang ke datetime64
sentimen_stock['compound_score'] = sentimen_stock['compound_score'].astype(str).str.replace(',', '.').astype(float)

In [None]:
sentimen_stock.head()

In [None]:
stock_price.tail()

In [None]:
df_avg = sentimen_stock.groupby("date")["compound_score"].mean().reset_index()
print(df_avg)

### Merge Dataset

In [None]:
df = pd.merge(stock_price, df_avg, left_on='Date', right_on='date', how='left')
df.head()

In [None]:
df = df.drop(columns=['date'])  # Hapus kolom 'date'

# Ubah nama kolom
df.rename(columns={'compound_score': 'sentimen_score'}, inplace=True)
df.rename(columns={'Volatilitas Pasar': 'volatilitas_pasar'}, inplace=True)
df.rename(columns={'Volume': 'volume'}, inplace=True)
df.rename(columns={'Close': 'close'}, inplace=True)
df.rename(columns={'Low': 'low'}, inplace=True)
df.rename(columns={'High': 'high'}, inplace=True)
df.rename(columns={'Open': 'open'}, inplace=True)

In [None]:
df["sentimen_score"].fillna(0.000000, inplace=True)

In [None]:
df.head()

In [None]:
import pandas as pd

# Save the first 5 rows to an Excel file
df.to_excel("AGRO_VAL.xlsx", index=False)
print("File 'output.xlsx' has been saved.")

In [None]:
import matplotlib.pyplot as plt

# Pastikan 'Date' bertipe datetime agar mudah diproses
df['Date'] = pd.to_datetime(df['Date'])

# Set ukuran figure
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 12))
fig.suptitle('Visualisasi Data Saham', fontsize=18)

# List kolom yang akan diplot (tanpa 'Date')
columns = ['open', 'high', 'low', 'close', 'volume', 'volatilitas_pasar', 'sentimen_score']
axes = axes.flatten()

# Loop untuk membuat plot masing-masing kolom
for i, col in enumerate(columns):
    if i < len(axes):  # Pastikan tidak lebih dari jumlah subplot
        axes[i].plot(df['Date'], df[col], label=col, color='b')
        axes[i].set_title(f'{col.capitalize()} Over Time', fontsize=14)
        axes[i].set_xlabel('Date')
        axes[i].set_ylabel(col.capitalize())
        axes[i].legend()

# Mengatur layout agar tidak bertumpukan
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()


### Data Processing

In [None]:
# Pilih 7 fitur
df = df[['Date','open', 'high', 'low', 'close', 'volume', 'volatilitas_pasar', 'sentimen_score']]

In [None]:
# Normalisasi data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume', 'volatilitas_pasar', 'sentimen_score']])

In [None]:
# Pembagian dataset
training_data_length = math.ceil(len(scaled_data) * 0.8)
train_data = scaled_data[:training_data_length]
test_data = scaled_data[training_data_length:]

In [None]:
# Pembuatan sequence data untuk x_train dan y_train
x_train = []
y_train = []
for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, :])
    y_train.append(train_data[i, 3])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 7))
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

In [None]:
# Pembuatan sequence data untuk x_test dan y_test
x_test = []
y_test = test_data[60:, 3]  # Target adalah kolom 'close'

for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, :])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 7))
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

In [None]:
# Pembagian data untuk visualisasi
train = df[:training_data_length]
valid = df[training_data_length:]

print(f"Total data: {len(df)}")
print(f"Training data: {len(train)}")
print(f"Validation data: {len(valid)}")

In [None]:
# Membangun model GRU
model = Sequential()
model.add(GRU(units=24, return_sequences=False, input_shape=(60, 7)))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Melatih model
history = model.fit(x_train, y_train, batch_size=32, epochs=250, validation_data=(x_test, y_test))

In [None]:
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
predictions = model.predict(x_test)

# Inverse transform untuk mendapatkan nilai prediksi dalam skala asli
predictions = scaler.inverse_transform(np.concatenate([predictions, np.zeros((predictions.shape[0], 6))], axis=1))[:, 0]

# Menyiapkan data validasi untuk visualisasi
valid = valid.iloc[-len(predictions):]
valid['Predictions'] = predictions

In [None]:
plt.figure(figsize=(16, 8))
plt.title('Prediksi Harga Saham PT Adhi Perkasa (Persero)', fontsize=20)
plt.xlabel('Date', fontsize=18)
plt.ylabel('Harga Penutupan Saham (Rp)', fontsize=18)

# Plot data aktual (seluruh periode)
plt.plot(df['Date'], df['close'], label='Data Aktual', color='blue', linewidth=2)

# Plot prediksi hanya pada periode validasi
plt.plot(valid['Date'], valid['Predictions'], label='Prediksi', color='orange')

# Tambahkan legenda
plt.legend(loc='lower right', fontsize=12)

# Format tanggal di sumbu x
plt.xticks(rotation=45)

# Tampilkan plot
plt.tight_layout()
plt.show()

In [None]:
# Menampilkan data hasil prediksi
hasil_prediksi = valid[['Date', 'close', 'Predictions']]
print(hasil_prediksi)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Konversi Tanggal ke Datetime dan Urutkan Data
valid['Date'] = pd.to_datetime(valid['Date'])
valid = valid.sort_values(by='Date')

# Plot data aktual vs prediksi
plt.figure(figsize=(16, 8))
plt.title('Perbandingan Data Aktual vs Prediksi Harga Saham', fontsize=20)
plt.xlabel('Date', fontsize=18)
plt.ylabel('Harga Penutupan Saham (Rp)', fontsize=18)

# Plot data aktual
plt.plot(valid['Date'], valid['close'], label='Data Aktual', color='blue', linewidth=2)

# Plot data prediksi
plt.plot(valid['Date'], valid['Predictions'], label='Prediksi', color='orange', linestyle='--', linewidth=2)

# Tambahkan grid dan legenda
plt.grid(True)
plt.legend(loc='upper left', fontsize=14)
plt.xticks(rotation=45)
plt.xlim(valid['Date'].min(), valid['Date'].max())  # Set batas sumbu-x

# Tampilkan Plot
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Hitung RMSE
rmse = np.sqrt(mean_squared_error(valid['close'], valid['Predictions']))
print(f"RMSE: {rmse:.2f}")

# Hitung MAE
mae = mean_absolute_error(valid['close'], valid['Predictions'])
print(f"MAE: {mae:.2f}")

# Fungsi MAPE dengan perlindungan terhadap nol
def mean_absolute_percentage_error(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), 0)) * 100

# Hitung MAPE
mape = mean_absolute_percentage_error(valid['close'], valid['Predictions'])
print(f"MAPE: {mape:.2f}%")

In [None]:
import tensorflow as tf
from tensorflow.keras.models import save_model
import joblib

# Simpan model GRU
save_model(model, 'GRU_MODEL_TLKM.h5')

# Simpan scaler (untuk normalisasi data)
joblib.dump(scaler, 'GRU_SCALER_TLKM.save')  

print("Model dan scaler berhasil disimpan:")
print("- GRU_MODEL_AGRO.h5")
print("- GRU_SCALER_AGRO.save")

In [None]:
# # Prediksi dalam skala normalisasi
# y_pred_scaled = model.predict(x_test)

# # Hitung MSE pada skala 0-1
# mse_scaled = mean_squared_error(y_test, y_pred_scaled)
# print(f"MSE (Scaled): {mse_scaled}")

# # Hitung RMSE pada skala 0-1
# rmse_scaled = np.sqrt(mse_scaled)
# print(f"RMSE (Scaled): {rmse_scaled}")

In [None]:
# import seaborn as sns
# import matplotlib.pyplot as plt

# plt.figure(figsize=(10,6))
# sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
# plt.title("Korelasi Antar Fitur")
# plt.show()