In [33]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib
import os

In [34]:
COIN = "binancecoin"
df = pd.read_csv(f"../data/processed/{COIN}_processed.csv", parse_dates=["open_time"])
df = df.set_index("open_time")

# Train/Test/Val split

In [35]:
train_ratio = 0.7
val_ratio   = 0.15

In [36]:
n = len(df)
train_end = int(n * train_ratio)
val_end   = train_end + int(n * val_ratio)

df_train = df.iloc[:train_end]
df_val   = df.iloc[train_end:val_end]
df_test  = df.iloc[val_end:]

print("Train:", df_train.shape, "Val:", df_val.shape, "Test:", df_test.shape)

Train: (30680, 12) Val: (6574, 12) Test: (6576, 12)


# Scaling

In [37]:
feature_cols = [
    "open","high","low","close","volume",
    "return_1h","volatility_24h",
    "ma_24","ma_168","ma_ratio",
    "vol_change","missing_flag"
]

In [38]:
feature_scaler = MinMaxScaler()
df_train[feature_cols] = feature_scaler.fit_transform(df_train[feature_cols])
df_val[feature_cols]   = feature_scaler.transform(df_val[feature_cols])
df_test[feature_cols]  = feature_scaler.transform(df_test[feature_cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train[feature_cols] = feature_scaler.fit_transform(df_train[feature_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val[feature_cols]   = feature_scaler.transform(df_val[feature_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[feature_cols]  = feature_scaler.transform(df_te

In [39]:
price_scaler = MinMaxScaler()
df_train[["close"]] = price_scaler.fit_transform(df_train[["close"]])
df_val[["close"]]   = price_scaler.transform(df_val[["close"]])
df_test[["close"]]  = price_scaler.transform(df_test[["close"]])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train[["close"]] = price_scaler.fit_transform(df_train[["close"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val[["close"]]   = price_scaler.transform(df_val[["close"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[["close"]]  = price_scaler.transform(df_test[["close"]])


In [40]:
SAVE_DIR = f"../data/scaled/{COIN}"
os.makedirs(SAVE_DIR, exist_ok=True)

df_train.to_csv(f"{SAVE_DIR}/{COIN}_train_scaled.csv")
df_val.to_csv(f"{SAVE_DIR}/{COIN}_val_scaled.csv")
df_test.to_csv(f"{SAVE_DIR}/{COIN}_test_scaled.csv")

joblib.dump(feature_scaler, f"{SAVE_DIR}/{COIN}_feature_scaler.pkl")
joblib.dump(price_scaler,   f"{SAVE_DIR}/{COIN}_price_scaler.pkl")

print("\n✓ Scaling + Splitting completed and saved.\n")
print("Saved to:", SAVE_DIR)


✓ Scaling + Splitting completed and saved.

Saved to: ../data/scaled/binancecoin
