In [13]:
import numpy as np
import pandas as pd
import inspect

In [14]:
df = pd.read_csv(r"bitcoin_2010-07-29_2025-04-25.csv")

In [15]:
close_price = df[["Close"]].iloc[::-1].reset_index(drop=True)
close_price.tail(10)

Unnamed: 0,Close
5374,83677.65
5375,84051.96
5376,84931.22
5377,84464.64
5378,85127.56
5379,85095.42
5380,87525.71
5381,93375.16
5382,93565.02
5383,93823.23


In [16]:
# Normalize the data
norm_data = np.log(df["Close"] / df["Close"].shift(1)).dropna()

In [17]:
# Split training and testing data
train_size = int(len(norm_data) * 0.8)
train_data_bitcoin = norm_data[:train_size]
test_data_bitcoin = norm_data[train_size:]

In [18]:
train_data_bitcoin.tail(10)

4297    0.030625
4298   -0.017401
4299    0.083823
4300   -0.010709
4301    0.009695
4302   -0.038421
4303    0.035711
4304   -0.047357
4305   -0.050564
4306   -0.025266
Name: Close, dtype: float64

In [19]:
test_data_bitcoin.head(10)

4307   -0.123322
4308   -0.002324
4309   -0.025881
4310   -0.059170
4311   -0.026623
4312    0.162752
4313   -0.037913
4314    0.150632
4315   -0.023393
4316    0.092819
Name: Close, dtype: float64

In [20]:
test_data_bitcoin.to_csv("TEST_bitcoin.csv", index=False)
train_data_bitcoin.to_csv("TRAIN_bitcoin.csv", index=False)
# Save the training and testing data to CSV files

In [21]:
def preroll_timeseries_df_named(df, history_len, predict_len, stride=1):
    """
    Rolls a univariate time series into overlapping windows.

    Args:
        df (pd.DataFrame): Input time series [T, 1].
        history_len (int): Number of historical time steps.
        predict_len (int): Number of future time steps.
        stride (int): Sliding step between windows.

    Returns:
        pd.DataFrame: Rolled matrix [history+predict rows, N samples columns]
    """
    # Try to infer variable name
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    df_name = next((name for name, val in callers_local_vars if val is df), "df")

    # Prepare
    data = df.values.squeeze().astype('float32')  # [T]
    total_len = history_len + predict_len
    sequences = []

    # Sliding window
    for i in range(0, len(data) - total_len + 1, stride):
        window = data[i:i + total_len].reshape(-1, 1)  # [T, 1]
        sequences.append(window)

    # Stack windows side-by-side
    rolled_array = np.concatenate(sequences, axis=1)  # [T, N]
    ROLLED_df = pd.DataFrame(rolled_array)

    # Save
    filename = f"ROLLED_{df_name}.csv"
    ROLLED_df.to_csv(filename, index=False)

    print(f"✅ Saved: {filename}")
    print(f"📐 ROLLED_{df_name}.shape = {ROLLED_df.shape}")
    return ROLLED_df


In [22]:
preroll_timeseries_df_named(train_data_bitcoin, history_len=50, predict_len=20, stride=1)

✅ Saved: ROLLED_train_data_bitcoin.csv
📐 ROLLED_train_data_bitcoin.shape = (70, 4237)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4227,4228,4229,4230,4231,4232,4233,4234,4235,4236
0,-0.002756,-0.002031,-0.064693,-0.028159,0.000378,-0.007818,0.005509,-0.010407,-0.004463,0.009721,...,-0.041202,-0.001247,-0.007594,-0.010269,0.024857,-0.013175,-0.003065,0.020337,0.013940,0.001444
1,-0.002031,-0.064693,-0.028159,0.000378,-0.007818,0.005509,-0.010407,-0.004463,0.009721,-0.009537,...,-0.001247,-0.007594,-0.010269,0.024857,-0.013175,-0.003065,0.020337,0.013940,0.001444,0.000456
2,-0.064693,-0.028159,0.000378,-0.007818,0.005509,-0.010407,-0.004463,0.009721,-0.009537,0.018883,...,-0.007594,-0.010269,0.024857,-0.013175,-0.003065,0.020337,0.013940,0.001444,0.000456,-0.010302
3,-0.028159,0.000378,-0.007818,0.005509,-0.010407,-0.004463,0.009721,-0.009537,0.018883,-0.020953,...,-0.010269,0.024857,-0.013175,-0.003065,0.020337,0.013940,0.001444,0.000456,-0.010302,-0.008860
4,0.000378,-0.007818,0.005509,-0.010407,-0.004463,0.009721,-0.009537,0.018883,-0.020953,-0.047951,...,0.024857,-0.013175,-0.003065,0.020337,0.013940,0.001444,0.000456,-0.010302,-0.008860,0.043610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0.002910,0.003289,0.014713,-0.000701,-0.009604,0.012846,-0.020832,0.016448,-0.008838,0.000308,...,0.011033,-0.042198,-0.009352,-0.015179,0.030625,-0.017401,0.083823,-0.010709,0.009695,-0.038421
66,0.003289,0.014713,-0.000701,-0.009604,0.012846,-0.020832,0.016448,-0.008838,0.000308,-0.000425,...,-0.042198,-0.009352,-0.015179,0.030625,-0.017401,0.083823,-0.010709,0.009695,-0.038421,0.035711
67,0.014713,-0.000701,-0.009604,0.012846,-0.020832,0.016448,-0.008838,0.000308,-0.000425,0.000859,...,-0.009352,-0.015179,0.030625,-0.017401,0.083823,-0.010709,0.009695,-0.038421,0.035711,-0.047357
68,-0.000701,-0.009604,0.012846,-0.020832,0.016448,-0.008838,0.000308,-0.000425,0.000859,0.000078,...,-0.015179,0.030625,-0.017401,0.083823,-0.010709,0.009695,-0.038421,0.035711,-0.047357,-0.050564


In [23]:
preroll_timeseries_df_named(test_data_bitcoin, history_len=50, predict_len=20, stride=1)


✅ Saved: ROLLED_test_data_bitcoin.csv
📐 ROLLED_test_data_bitcoin.shape = (70, 1008)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,998,999,1000,1001,1002,1003,1004,1005,1006,1007
0,-0.123322,-0.002324,-0.025881,-0.059170,-0.026623,0.162752,-0.037913,0.150633,-0.023393,0.092819,...,0.038840,-0.028988,0.028988,-0.101137,0.001053,0.015666,-0.028378,-0.076407,-0.260065,-0.064738
1,-0.002324,-0.025881,-0.059170,-0.026623,0.162752,-0.037913,0.150633,-0.023393,0.092819,-0.016910,...,-0.028988,0.028988,-0.101137,0.001053,0.015666,-0.028378,-0.076407,-0.260065,-0.064738,-0.022545
2,-0.025881,-0.059170,-0.026623,0.162752,-0.037913,0.150633,-0.023393,0.092819,-0.016910,-0.003636,...,0.028988,-0.101137,0.001053,0.015666,-0.028378,-0.076407,-0.260065,-0.064738,-0.022545,-0.001630
3,-0.059170,-0.026623,0.162752,-0.037913,0.150633,-0.023393,0.092819,-0.016910,-0.003636,0.069260,...,-0.101137,0.001053,0.015666,-0.028378,-0.076407,-0.260065,-0.064738,-0.022545,-0.001630,-0.003268
4,-0.026623,0.162752,-0.037913,0.150633,-0.023393,0.092819,-0.016910,-0.003636,0.069260,0.024953,...,0.001053,0.015666,-0.028378,-0.076407,-0.260065,-0.064738,-0.022545,-0.001630,-0.003268,0.004898
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,0.031640,-0.029861,-0.140540,0.073545,0.106130,0.172385,0.039082,-0.074151,-0.047375,0.067220,...,-0.043803,0.043803,0.014185,-0.153447,-0.031696,0.054424,-0.021088,-0.067823,0.051293,0.000000
66,-0.029861,-0.140540,0.073545,0.106130,0.172385,0.039082,-0.074151,-0.047375,0.067220,0.034532,...,0.043803,0.014185,-0.153447,-0.031696,0.054424,-0.021088,-0.067823,0.051293,0.000000,0.018167
67,-0.140540,0.073545,0.106130,0.172385,0.039082,-0.074151,-0.047375,0.067220,0.034532,0.084468,...,0.014185,-0.153447,-0.031696,0.054424,-0.021088,-0.067823,0.051293,0.000000,0.018167,0.105524
68,0.073545,0.106130,0.172385,0.039082,-0.074151,-0.047375,0.067220,0.034532,0.084468,-0.072090,...,-0.153447,-0.031696,0.054424,-0.021088,-0.067823,0.051293,0.000000,0.018167,0.105524,-0.079675


In [24]:
ROLLED_test_bitcoin = pd.read_csv("ROLLED_train_data_bitcoin.csv")

ROLLED_test_bitcoin.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4227,4228,4229,4230,4231,4232,4233,4234,4235,4236
count,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,...,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0,70.0
mean,0.00041,0.000632,0.000364,0.001523,0.001799,0.001798,0.001904,0.001837,0.001987,0.002261,...,-0.005322,-0.004982,-0.003767,-0.003812,-0.003526,-0.00443,-0.003732,-0.004365,-0.005378,-0.005938
std,0.031308,0.03134,0.031444,0.030492,0.030306,0.030306,0.030285,0.030282,0.030247,0.030274,...,0.028949,0.02866,0.030561,0.030569,0.030601,0.030685,0.031037,0.031472,0.031805,0.031805
min,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,-0.091361,...,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814,-0.086814
25%,-0.012036,-0.012036,-0.012437,-0.012036,-0.011624,-0.011624,-0.011624,-0.011624,-0.011624,-0.011624,...,-0.017119,-0.017119,-0.017119,-0.017119,-0.017119,-0.017335,-0.017335,-0.01812,-0.019013,-0.021196
50%,-0.00102,-0.000162,-0.000162,0.001644,0.001644,0.001609,0.001609,0.000584,0.000584,0.001884,...,-0.004891,-0.004891,-0.004891,-0.004891,-0.003935,-0.004891,-0.003935,-0.004891,-0.005863,-0.007805
75%,0.017963,0.017963,0.017963,0.018274,0.018274,0.018274,0.018274,0.018274,0.018274,0.018274,...,0.015334,0.015334,0.016679,0.016679,0.016679,0.015334,0.016679,0.016679,0.015334,0.014607
max,0.088111,0.088111,0.088111,0.088111,0.088111,0.088111,0.088111,0.088111,0.088111,0.088111,...,0.076163,0.076163,0.083823,0.083823,0.083823,0.083823,0.083823,0.083823,0.083823,0.083823
