In [170]:
import pandas as pd
import numpy as np

In [171]:
df = pd.read_csv("data/processed/binancecoin_processed.csv", parse_dates=["open_time"])
df.index = pd.to_datetime(df.open_time)

In [172]:
feature_cols = [
    "open","high","low","close","volume",
    "return_1h","volatility_24h",
    "ma_24","ma_168","ma_ratio",
    "vol_change","missing_flag"
]

In [173]:
data = df[feature_cols].values
close_prices = df["close"].values

In [174]:
SEQ_LEN = 48

def make_dataset(horizon):
    X, y = [], []
    for i in range(SEQ_LEN, len(df) - horizon):
        X.append(data[i-SEQ_LEN : i])
        y.append(close_prices[i + horizon])
    return np.array(X), np.array(y)

In [175]:
datasets = {
    "1h":  make_dataset(1),
    "12h": make_dataset(12),
    "24h": make_dataset(24),
    "48h": make_dataset(48),
}

In [176]:
def split(X, y):
    n = len(X)
    train_end = int(n * 0.7)
    val_end   = int(n * 0.85)
    return (
        X[:train_end], y[:train_end],
        X[train_end:val_end], y[train_end:val_end],
        X[val_end:], y[val_end:]
    )

In [177]:
for name, (X, y) in datasets.items():
    print(f"{name} → X: {X.shape}, y: {y.shape}")

    X_train, y_train, X_val, y_val, X_test, y_test = split(X, y)

    PATH = f"data/sequences/Binancecoin/{name}/"
    import os
    os.makedirs(PATH, exist_ok=True)

    np.save(PATH + "X_train.npy", X_train)
    np.save(PATH + "y_train.npy", y_train)
    np.save(PATH + "X_val.npy",   X_val)
    np.save(PATH + "y_val.npy",   y_val)
    np.save(PATH + "X_test.npy",  X_test)
    np.save(PATH + "y_test.npy",  y_test)

print("✓ All 4 datasets created successfully.")

1h → X: (43781, 48, 12), y: (43781,)
12h → X: (43770, 48, 12), y: (43770,)
24h → X: (43758, 48, 12), y: (43758,)
48h → X: (43734, 48, 12), y: (43734,)
✓ All 4 datasets created successfully.
