In [20]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch

# Load data dengan header bertingkat dan bersihkan struktur kolom
df = pd.read_csv('../data/BBRI_raw.csv', header=[0, 1, 2], index_col=0, parse_dates=True)
df.columns = df.columns.get_level_values(0)
df = df.dropna()

# Ambil hanya kolom Close
data = df.filter(['Close']).values
print(f"Total baris data: {len(data)}")

Total baris data: 1707


In [21]:
# Normalisasi ke rentang 0-1 untuk stabilitas gradien
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

print(f"Sampel data awal: {data[0]} -> {scaled_data[0]}")

Sampel data awal: [2286.93432617] -> [0.20219136]


In [22]:
window_size = 60
X, y = [], []

# Membuat urutan data: 60 hari sebelumnya (X) untuk menebak hari ini (y)
for i in range(window_size, len(scaled_data)):
    X.append(scaled_data[i-window_size:i, 0])
    y.append(scaled_data[i, 0])

X, y = np.array(X), np.array(y)
print(f"Shape X: {X.shape} | Shape y: {y.shape}")

Shape X: (1647, 60) | Shape y: (1647,)


In [23]:
# Konversi ke Tensor dan sesuaikan dimensi untuk LSTM PyTorch
# Format yang dibutuhkan: [batch_size, sequence_length, num_features]
X_tensor = torch.from_numpy(X).type(torch.Tensor).unsqueeze(-1)
y_tensor = torch.from_numpy(y).type(torch.Tensor).unsqueeze(-1)

print(f"Final Tensor Shape - X: {X_tensor.shape}") # [n, 60, 1]
print(f"Final Tensor Shape - y: {y_tensor.shape}") # [n, 1]

Final Tensor Shape - X: torch.Size([1647, 60, 1])
Final Tensor Shape - y: torch.Size([1647, 1])
