In [8]:
import pandas as pd
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Masking
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from google.colab import files

In [2]:
df = pd.read_csv("btcdata_clean.csv")

In [3]:
df

Unnamed: 0.1,Unnamed: 0,open,high,low,close,market_cap,next_price
0,0,121870.68,121895.99,121858.06,121895.57,2.428033e+12,122162.01
1,1,121870.68,121895.99,121858.06,121895.57,2.428033e+12,122162.01
2,2,121870.68,121895.99,121858.06,121895.57,2.428033e+12,122162.01
3,3,121870.68,121895.99,121858.06,121895.57,2.428033e+12,122162.01
4,4,121870.68,121895.99,121858.06,121895.57,2.428033e+12,122162.01
...,...,...,...,...,...,...,...
64593,64593,121154.04,121158.95,121131.06,121144.14,2.415036e+12,121256.67
64594,64594,121154.04,121158.95,121131.06,121144.14,2.415036e+12,121256.67
64595,64595,121154.04,121158.95,121131.06,121144.14,2.415036e+12,121256.67
64596,64596,121154.04,121158.95,121131.06,121144.14,2.415036e+12,121256.67


In [6]:
# ============================================
# 🔧 Training Script untuk Prediksi Harga Bitcoin
# ============================================

# ------------------ Custom Scaler ------------------
class TimeSeriesScaler(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.scaler = RobustScaler()

    def fit(self, X, y=None):
        n_samples, n_timesteps, n_features = X.shape
        X_reshaped = X.reshape(-1, n_features)
        self.scaler.fit(X_reshaped)
        return self

    def transform(self, X):
        n_samples, n_timesteps, n_features = X.shape
        X_reshaped = X.reshape(-1, n_features)
        X_scaled = self.scaler.transform(X_reshaped)
        return X_scaled.reshape(n_samples, n_timesteps, n_features)

# ------------------ Sequence Builder ------------------
def create_padded_sequences(X, y, max_len=48):
    Xs, ys = [], []
    for i in range(len(X)):
        seq_x = X[max(0, i - max_len + 1):i + 1]
        pad_len = max_len - len(seq_x)
        if pad_len > 0:
            pad_array = np.zeros((pad_len, X.shape[1]))
            seq_x = np.vstack([pad_array, seq_x])
        Xs.append(seq_x)
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

# ------------------ Build LSTM Model ------------------
def build_lstm_model(input_shape, units=64, dropout_rate=0.2, lr=0.001):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=input_shape))
    model.add(LSTM(units, return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse')
    return model

# ------------------ Load Data ------------------
# Pastikan btcdata sudah tersedia sebagai DataFrame
# dengan kolom 'next_price' sebagai target
features = df.drop(columns=['next_price'])
target = df['next_price']

MAX_SEQ_LEN = 48
X_seq, y_seq = create_padded_sequences(features.values, target.values, max_len=MAX_SEQ_LEN)

# ------------------ Train-Test Split ------------------
X_train_seq, X_test_seq, y_train_seq, y_test_seq = train_test_split(
    X_seq, y_seq, test_size=0.25, shuffle=False
)

# ------------------ Pipeline untuk Preprocessing ------------------
scaler = TimeSeriesScaler()
pipeline = Pipeline([("scaler", scaler)])

# Fit pipeline dan transform data
X_train_scaled = pipeline.fit_transform(X_train_seq)
X_test_scaled = pipeline.transform(X_test_seq)

# ------------------ Train Model ------------------
model = build_lstm_model(input_shape=(MAX_SEQ_LEN, X_train_scaled.shape[2]),
                         units=64, dropout_rate=0.2, lr=0.001)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(X_train_scaled, y_train_seq,
          validation_data=(X_test_scaled, y_test_seq),
          epochs=30, batch_size=32, callbacks=[early_stop], verbose=1)

# ------------------ Save Model & Pipeline ------------------
model.save("lstm_model.keras", include_optimizer=False)

with open("scaler_pipeline.pkl", "wb") as f:
    pickle.dump(pipeline, f)

best_params = {
    'units': 64,
    'dropout_rate': 0.2,
    'lr': 0.001,
    'epochs': 30,
    'batch_size': 32
}
with open("lstm_best_params.pkl", "wb") as f:
    pickle.dump(best_params, f)

print("✅ Model, pipeline, dan parameter berhasil disimpan.")

  super().__init__(**kwargs)


Epoch 1/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 61ms/step - loss: 14790338560.0000 - val_loss: 14744684544.0000
Epoch 2/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 60ms/step - loss: 14764035072.0000 - val_loss: 14721150976.0000
Epoch 3/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 61ms/step - loss: 14741053440.0000 - val_loss: 14697716736.0000
Epoch 4/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 60ms/step - loss: 14719386624.0000 - val_loss: 14674343936.0000
Epoch 5/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 60ms/step - loss: 14696303616.0000 - val_loss: 14650967040.0000
Epoch 6/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 59ms/step - loss: 14673235968.0000 - val_loss: 14627631104.0000
Epoch 7/30
[1m1514/1514[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 59ms/step - loss: 14649124864.0000 - val_loss: 14604292

In [9]:
files.download('lstm_model.keras')
files.download('scaler_pipeline.pkl')
files.download('lstm_best_params.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>