In [1]:
%load_ext autoreload
%autoreload 2

import os, sys, json, time, warnings
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [2]:
# Paths (adjust if your layout differs)
ROOT = Path('/home/pduce/ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit')
DATA = ROOT / "data"
SRC  = ROOT / "src"
SUBM = ROOT / "sample_submission"

# Ensure src is importable
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

# Create sample_submission dir if missing
SUBM.mkdir(parents=True, exist_ok=True)

SEED = 1337
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE

'cpu'

In [3]:
# Load dataset files
info_path = DATA / "dataset_info.json"
if info_path.exists():
    info = json.loads(info_path.read_text(encoding="utf-8"))
    print("dataset_info.json loaded. Keys:", list(info.keys()))
    print(json.dumps({k: info[k] for k in ['features','input_len','horizon_len','outputs']}, indent=2))
else:
    print("dataset_info.json not found at", info_path)

# Peek train / x_test
train_path = DATA / "train.pkl"
x_test_path  = DATA / "x_test.pkl"
y_local_path = DATA / "y_test_local.pkl"

train = pd.read_pickle(train_path)
train['event_datetime'] = pd.to_datetime('2024-01-01') + train['time_step']*pd.Timedelta(minutes=1) 
x_test  = pd.read_pickle(x_test_path)
y_test_local = pd.read_pickle(y_local_path)

print("train shape:", train.shape, "| columns:", train.columns.tolist())
print("x_test  shape:", x_test.shape,  "| columns:", x_test.columns.tolist())
print("y_test_local shape:", y_test_local.shape, "| columns:", y_test_local.columns.tolist())

display(train.head(3))
display(x_test.head(3))
display(y_test_local.head(3))

dataset_info.json loaded. Keys: ['freq', 'features', 'input_len', 'horizon_len', 'dtypes', 'outputs', 'sha256']
{
  "features": [
    "close",
    "volume"
  ],
  "input_len": 60,
  "horizon_len": 10,
  "outputs": {
    "train": {
      "columns": [
        "series_id",
        "time_step",
        "close",
        "volume"
      ]
    },
    "x_test": {
      "columns": [
        "window_id",
        "time_step",
        "close",
        "volume"
      ]
    },
    "y_test_local": {
      "columns": [
        "window_id",
        "time_step",
        "close"
      ]
    }
  }
}
train shape: (18331224, 5) | columns: ['series_id', 'time_step', 'close', 'volume', 'event_datetime']
x_test  shape: (3000000, 4) | columns: ['window_id', 'time_step', 'close', 'volume']
y_test_local shape: (20, 3) | columns: ['window_id', 'time_step', 'close']


Unnamed: 0,series_id,time_step,close,volume,event_datetime
0,1,0,0.137,171985.703125,2024-01-01 00:00:00
1,1,1,0.13656,85451.398438,2024-01-01 00:01:00
2,1,2,0.13647,121151.898438,2024-01-01 00:02:00


Unnamed: 0,window_id,time_step,close,volume
0,1,0,0.1126,24976.0
1,1,1,0.1126,0.0
2,1,2,0.1125,2299.0


Unnamed: 0,window_id,time_step,close
0,1,0,0.1131
1,1,1,0.1131
2,1,2,0.113


In [4]:
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view as swv

class WindowsDatasetVect:
    """
    Vectorized window builder that returns two DataFrames:

    X columns:
      - window_id: integer id of each window
      - time_step: 0..(input_len-1) within the input segment
      - close
      - volume
      - event_datetime

    y columns:
      - window_id: same ids as in X
      - time_step: 0..(horizon_len-1) within the future horizon
      - close: future close
      - prev_close: the last input close (index input_len-1, e.g. 59 when input_len=60)
      - event_datetime: timestamps of the future horizon

    Notes:
      * Requires df to have columns:
          ['series_id','time_step','close','volume','event_datetime']
      * Windows are created per series_id after sorting by time_step.
    """
    def __init__(
        self,
        df : pd.DataFrame = None,
        train_path: str = None,
        window: int = 70,
        input_len: int = 60,
        horizon_len: int = 10,
        rolling: bool = True,
        step_size: int | None = None,
    ) -> None:
        assert input_len + horizon_len == window, "window must equal input_len + horizon_len"
        # default stepping: 1 if rolling, else full non-overlapping windows
        if step_size is None:
            step_size = 1 if rolling else window

        if train_path is not None:
            df = pd.read_pickle(train_path)
        if df is None:
            raise ValueError("Provide either df or train_path")

        required = {'series_id','time_step','close','volume','event_datetime'}
        if not required.issubset(df.columns):
            raise ValueError(f"df missing required columns {required}, found {list(df.columns)}")

        # group per series, sorted by time_step
        groups = {
            sid: g.sort_values(['event_datetime', 'time_step']).reset_index(drop=True)
            for sid, g in df.groupby('series_id')
        }

        X_parts: list[pd.DataFrame] = []
        Y_parts: list[pd.DataFrame] = []

        next_win_id = 0

        for _, g in groups.items():
            n = len(g)
            if n < window:
                continue

            close  = g['close' ].to_numpy(np.float32)
            volume = g['volume'].to_numpy(np.float32)
            dt     = g['event_datetime'].to_numpy('datetime64[ns]')

            # sliding windows (shape: (n - window + 1, window))
            w_close  = swv(close,  window_shape=window)[::step_size]
            w_volume = swv(volume, window_shape=window)[::step_size]
            w_dt     = swv(dt,     window_shape=window)[::step_size]

            num_win = w_close.shape[0]
            if num_win == 0:
                continue

            # split into input and horizon
            x_close  = w_close[:,  :input_len]                      # (num_win, input_len)
            x_volume = w_volume[:, :input_len]
            x_dt     = w_dt[:,     :input_len]

            y_close = w_close[:,  input_len:]                       # (num_win, horizon_len)
            y_dt    = w_dt[:,     input_len:]
            prev_c  = x_close[:, -1]                                # (num_win,)

            # window ids for this group
            win_ids = np.arange(next_win_id, next_win_id + num_win, dtype=np.int64)

            # X dataframe chunk
            X_parts.append(pd.DataFrame({
                "window_id":     np.repeat(win_ids, input_len)+1,
                "time_step":     np.tile(np.arange(input_len, dtype=np.int32), num_win),
                "close":         x_close.reshape(-1),
                "volume":        x_volume.reshape(-1),
                "event_datetime": x_dt.reshape(-1),
            }))

            # y dataframe chunk
            Y_parts.append(pd.DataFrame({
                "window_id":     np.repeat(win_ids, horizon_len)+1,
                "time_step":     np.tile(np.arange(horizon_len, dtype=np.int32), num_win),
                "close":         y_close.reshape(-1),
                "prev_close":    np.repeat(prev_c, horizon_len),
                "event_datetime": y_dt.reshape(-1),
            }))

            next_win_id += num_win

        # Public attributes
        if X_parts:
            self.X = pd.concat(X_parts, ignore_index=True)
        else:
            self.X = pd.DataFrame(columns=["window_id","time_step","close","volume","event_datetime"])

        if Y_parts:
            self.y = pd.concat(Y_parts, ignore_index=True)
        else:
            self.y = pd.DataFrame(columns=["window_id","time_step","close","prev_close","event_datetime"])

        self.num_windows = int(self.X["window_id"].max()) if len(self.X) else 0
        self.input_len = input_len
        self.horizon_len = horizon_len
        self.window = window
        self.step_size = step_size

    def __len__(self) -> int:
        return self.num_windows

    def windows(self, window_id: int) -> tuple[pd.DataFrame, pd.DataFrame]:
        """Convenience: return (X_rows, y_rows) for a given window_id."""
        Xw = self.X[self.X["window_id"] == window_id].sort_values("time_step")
        Yw = self.y[self.y["window_id"] == window_id].sort_values("time_step")
        return Xw, Yw


In [86]:
import copy
import numpy as np
import torch
from torch.utils.data import Dataset
import pandas as pd
from typing import Dict, Any, List, Optional, Iterable, Callable, Tuple
from concurrent.futures import ProcessPoolExecutor, as_completed

# --- your existing imports ---
from icaif.dataset import TrainWindowSampler, TrainWindowSamplerVect
from athenea.stats.regressions import Ridge
from icaif.metrics import evaluate_all_metrics

SEED = 42  # ensure you define this somewhere

def _features_raw(X):
    """
    X: array-like of shape (n_samples, 60, 2)
       [:, :, 0] = prices (close); [:, :, 1] = volumes
    Returns: list[pd.DataFrame], each of shape (n_samples, 1)
    """
    # Arrange as (time x samples)
    X_prices  = pd.DataFrame(X[:, :, 0]).T
    X_volumes = pd.DataFrame(X[:, :, 1]).T

    logp    = np.log(X_prices)
    logrets = logp.diff()  # 1-min log returns, time on rows
    vol_lr  = logrets.mul(X_volumes, axis=0)

    cs_price     = logrets.fillna(0).cumsum()
    # cs_price_vol = vol_lr.fillna(0).cumsum()  # kept if you need in the future

    avg_lr            = logrets.mean().to_frame()
    sign_change_share = np.sign(logrets).diff().ne(0).sum().to_frame()  # computed but not used below
    avg_vol_lr        = vol_lr.mean().to_frame()
    vd                = np.sign(logrets).mul(X_volumes, axis=0).mean().to_frame()
    rv                = logrets.pow(2).mean().to_frame()

    features = [
        avg_lr,
        avg_vol_lr,
        vd,
        rv,
    ]

    for lag in [3, 5, 10, 20, 30, 40, 50, 60]:
        r_tau = cs_price.iloc[-lag:].sum().to_frame(0)
        features.append(r_tau)

    return features

# (Optional) keep old name, but make it a thin wrapper if someone still calls transform()
def transform(X):
    feats_tr, _ = transform_fit(X)   # fit-once wrapper below
    return feats_tr

# --- NEW: scaler helpers (fit on train, apply to others) ---
from typing import List, Tuple

Scaler = List[Tuple[float, float]]  # per-feature (mean, std)

def _fit_zscore_scaler(features: List[pd.DataFrame]) -> Scaler:
    scaler: Scaler = []
    for f in features:
        # each f is (n_samples x 1)
        mu = float(f.mean().iloc[0])
        sd = float(f.std().iloc[0]) + 1e-12
        scaler.append((mu, sd))
    return scaler

def _apply_zscore(features: List[pd.DataFrame], scaler: Scaler) -> List[pd.DataFrame]:
    out = []
    for f, (mu, sd) in zip(features, scaler):
        out.append((f - mu) / sd)
    return out

def transform_fit(X) -> Tuple[List[pd.DataFrame], Scaler]:
    """Build raw features, fit scaler on them, return zscored features + scaler."""
    feats = _features_raw(X)
    sc = _fit_zscore_scaler(feats)
    return _apply_zscore(feats, sc), sc

def transform_apply(X, scaler: Scaler) -> List[pd.DataFrame]:
    """Build raw features and apply an already-fitted scaler."""
    feats = _features_raw(X)
    return _apply_zscore(feats, scaler)

from features_compute import build_features_np

def transform_nick(X):
    return [pd.DataFrame(f) for f in build_features_np(X)]

from icaif.metrics_np import evaluate_all_metrics_vectorized

def evaluate(model_results, y_true):
    """
    Backward-compatible evaluator for single-predict case.
    If you pass a 1D Series/DataFrame of window-level log-returns,
    it repeats each log-return across the 10 future steps.
    """
    y_pred = y_true.copy(deep=True).reset_index(drop=True)

    # Accept numpy array, Series or single-column DataFrame
    if isinstance(model_results, (pd.Series, pd.DataFrame)):
        mr = np.asarray(model_results).reshape(-1)
    else:
        mr = np.asarray(model_results).reshape(-1)

    # Repeat each window prediction for 10 steps, cum-sum in log space, exponentiate
    repeated = np.repeat(mr[:, None], 10, axis=1)                 # (n_windows, 10)
    factors  = np.exp(np.cumsum(repeated, axis=1))                # multiplicative factors
    # prev_close must be aligned per-window first step
    prev_close = (
        y_true.sort_values(['window_id', 'time_step'])
              .set_index(['window_id','time_step'])[['prev_close']]
              .to_numpy().reshape(-1, 10)[:, 0]
    )
    pred_close = (factors * prev_close[:, None]).reshape(-1)
    y_pred['pred_close'] = pred_close

    results = evaluate_all_metrics_vectorized(
        y_true=y_true.sort_values(['window_id','time_step']).reset_index(drop=True),
        y_pred=y_pred
    )
    return results

# --------------- NEW/UPDATED: worker function ---------------
def _run_one_fold(
    idx: int,
    train_ids: np.ndarray,
    val_ids: np.ndarray,
    df_train: pd.DataFrame,
    model_or_factory: Any,
    single_predict: bool = True,
) -> Tuple[int, str, Dict[str, Any], Any, Any]:
    """
    Execute a single fold end-to-end and return (idx, fold_name, metrics, model(s), fit_artifacts).
    Runs in a separate process when parallelized.

    If single_predict=True: one model on mean log-return (existing behavior).
    If single_predict=False: train 10 separate models, one per future-step log-return.
    """
    # Rebuild model per process
    base_model = model_or_factory() if callable(model_or_factory) else copy.deepcopy(model_or_factory)

    # Slice data for this fold
    df_tr = df_train[df_train['series_id'].isin(train_ids)].copy()
    df_va = df_train[df_train['series_id'].isin(val_ids)].copy()

    # Defensive: skip degenerate folds
    if df_tr.empty or df_va.empty:
        return idx, f"val_{'-'.join(map(str, val_ids))}", {}, None, None

    train_ds = WindowsDatasetVect(df_tr, step_size=70)
    val_ds   = WindowsDatasetVect(df_va, step_size=70)

    # Sort to ensure stable window/time ordering
    X_train = train_ds.X.sort_values(['window_id','time_step'])
    y_train = train_ds.y.sort_values(['window_id','time_step'])
    X_val   = val_ds.X.sort_values(['window_id','time_step'])
    y_val   = val_ds.y.sort_values(['window_id','time_step'])

    # Build tensors/matrices
    X_np_train = (
        X_train.set_index(['window_id','time_step'])[['close','volume']]
               .to_numpy().reshape(-1, 60, 2)
    )
    X_np_val = (
        X_val.set_index(['window_id','time_step'])[['close','volume']]
             .to_numpy().reshape(-1, 60, 2)
    )

    # For train targets
    y_np_train_closes = y_train.set_index(['window_id','time_step'])[['close']].to_numpy().reshape(-1, 10)
    prev_close_train  = y_train.set_index(['window_id','time_step'])[['prev_close']].to_numpy().reshape(-1, 10)[:, 0]
    y_np_train = np.concatenate([prev_close_train[:, None], y_np_train_closes], axis=1)  # (n_windows, 11)

    # For evaluation on val we’ll need prev_close per window
    prev_close_val = y_val.set_index(['window_id','time_step'])[['prev_close']].to_numpy().reshape(-1, 10)[:, 0]

    # Features (fit on train, apply to val)
    features_train, scaler = transform_fit(X_np_train)
    features_val           = transform_apply(X_np_val, scaler)

    fold_name = f"val_{'-'.join(map(str, val_ids))}"

    if single_predict:
        # --- existing behavior: average log-returns across the 10 steps ---
        y_lr_train = pd.Series(np.diff(np.log(y_np_train), axis=1).mean(axis=1)).to_frame()

        model = base_model
        fit_artifacts = model.fit(y_lr_train, features_train)
        # Predict per-window mean log-return for validation set
        y_lr_pred = model.predict(features_val)  # (n_val_windows, 1-like)

        # Compose a 10-step path by repeating each window's predicted mean
        y_true_sorted = y_val.reset_index(drop=True)
        # Use the helper 'evaluate' which handles single-predict case
        metrics = evaluate(y_lr_pred, y_true_sorted)

        return idx, fold_name, metrics, model, fit_artifacts

    else:
        # --- NEW: 10 separate models, one per horizon log-return ---
        y_lr_all = np.diff(np.log(y_np_train), axis=1)   # (n_windows, 10)
        n_steps = y_lr_all.shape[1]
        models_per_step = []
        fits_per_step   = []
        preds_per_step  = []

        # Helper to create a fresh model each step
        def _fresh_model():
            return model_or_factory() if callable(model_or_factory) else copy.deepcopy(model_or_factory)

        for j in range(n_steps):
            y_lr_j = pd.Series(y_lr_all[:, j]).to_frame()  # (n_windows, 1)
            m_j = _fresh_model()
            fit_j = m_j.fit(y_lr_j, features_train)
            y_pred_j = m_j.predict(features_val)           # shape (n_val_windows,)

            models_per_step.append(m_j)
            fits_per_step.append(fit_j)
            preds_per_step.append(np.asarray(y_pred_j).reshape(-1))

        # Stack predictions into (n_val_windows, 10)
        y_lr_pred_steps = np.column_stack(preds_per_step)
        # Turn log-returns into predicted closes per step
        factors = np.exp(np.cumsum(y_lr_pred_steps, axis=1))          # (n_val_windows, 10)
        pred_close_matrix = factors * prev_close_val[:, None]          # (n_val_windows, 10)
        pred_close_flat   = pred_close_matrix.reshape(-1)              # match long format

        # Build y_pred aligned to y_val (which we already sorted)
        y_true_sorted = y_val.reset_index(drop=True).copy()
        y_pred_sorted = y_true_sorted.copy()
        y_pred_sorted['pred_close'] = pred_close_flat

        # Vectorized metrics
        metrics = evaluate_all_metrics_vectorized(
            y_true=y_true_sorted,
            y_pred=y_pred_sorted,
        )

        return idx, fold_name, metrics, models_per_step, fits_per_step


# --------------- UPDATED: run_cv with parallelism + single_predict flag ---------------
def run_cv(
    df_train: pd.DataFrame,
    n_train: int = 4,
    model=None,
    *,
    include_last_fold: bool = False,   # set True to include the final possible window
    n_jobs: int = 1,                   # #processes; 1 keeps it sequential
    model_factory: Optional[Callable[[], Any]] = None,  # pass to rebuild model per fold
    single_predict: bool = True,       # NEW: True = current code; False = 10 models (one per horizon)
) -> pd.DataFrame:
    """
    Rolling group-based CV, optionally parallel across folds (process-based).

    - If single_predict=True: train one model per fold on the mean of per-step log-returns (existing behavior).
    - If single_predict=False: train 10 separate models per fold, one for each horizon log-return.

    Assumptions about `model` / `model_factory`:
      - Either:
          model_factory() -> fresh model with fit/predict methods
        Or:
          model is a picklable object with those methods (we deep-copy it per fold).
    """
    if (model is None) and (model_factory is None):
        raise ValueError("Provide either `model` or a `model_factory` (callable returning a fresh model).")

    series_ids = df_train['series_id'].unique()
    n_series   = len(series_ids)

    if n_train < 1:
        raise ValueError(f"n_train must be ≥ 1 (got {n_train}).")

    start = 0
    stop  = n_series + (1 if include_last_fold else 0)

    # Build fold definitions once
    fold_specs = []
    for i in range(start, stop):
        train_ids = series_ids[i : i + n_train]
        # We validate in the remaining series
        val_ids   = [srs_id for srs_id in series_ids if srs_id not in train_ids]
        fold_specs.append((i, train_ids, val_ids))

    if not fold_specs:
        raise RuntimeError("No folds were produced. Check your data and parameters.")

    # Choose what to pass to workers for model creation
    model_or_factory = model_factory if model_factory is not None else model

    # Sequential path (n_jobs == 1)
    if n_jobs == 1:
        results = [
            _run_one_fold(idx, train_ids, val_ids, df_train, model_or_factory, single_predict=single_predict)
            for (idx, train_ids, val_ids) in fold_specs
        ]
    else:
        # Parallel path (processes)
        # Tip: to avoid CPU over-subscription with BLAS, consider setting env vars:
        # OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 NUMEXPR_NUM_THREADS=1
        results = [None] * len(fold_specs)
        with ProcessPoolExecutor(max_workers=n_jobs) as ex:
            futures = {
                ex.submit(_run_one_fold, idx, train_ids, val_ids, df_train, model_or_factory, single_predict): pos
                for pos, (idx, train_ids, val_ids) in enumerate(fold_specs)
            }
            for fut in as_completed(futures):
                pos = futures[fut]
                results[pos] = fut.result()

    # results: list of (idx, fold_name, metrics, model(s), fit_artifacts)
    # Keep chronological order by fold index
    results.sort(key=lambda t: t[0])
    fold_names   = [name for _, name, _, _, _ in results]
    metric_rows  = [metrics for _, _, metrics, _, _ in results]
    models       = [mdl for _, _, _, mdl, _ in results]              # may be a model or a list[models]
    fit_artifacts = [art for _, _, _, _, art in results]             # may be any / list

    # Assemble DataFrame: rows = metric names, cols = folds
    metrics_df = pd.DataFrame(metric_rows, index=fold_names).T
    return metrics_df, models, fit_artifacts

In [96]:
%load_ext autoreload
%autoreload 2

from athenea.stats.regressions import Ridge
from icaif.metrics_np import evaluate_all_metrics_vectorized
def make_ridge(l2=0.1):
    model = Ridge(l2=l2)
    model.transform = transform
    model.evaluate = evaluate
    return model


metrics, models, model_results = run_cv(
    df_train=train,
    n_train=30,
    include_last_fold=True,
    n_jobs=4,
    model_factory=make_ridge(l2=0.1),
    single_predict=True
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
#False
metrics.median(axis=1)

MSE            21.934642
MAE             0.593613
IC              0.042382
IR              0.136839
SharpeRatio     0.051665
MDD             0.754754
VaR            -0.006733
ES             -0.011325
dtype: float64

In [97]:
#True
metrics.median(axis=1)

MSE            21.927940
MAE             0.594394
IC              0.050940
IR              0.154523
SharpeRatio     0.053322
MDD             0.762495
VaR            -0.006785
ES             -0.011387
dtype: float64

In [48]:
model = make_ridge(l2=10000)
train_ds = WindowsDatasetVect(train, step_size=70)

In [49]:
X_train = train_ds.X
y_train = train_ds.y


X_np_train = X_train.set_index(['window_id','time_step'])[['close','volume']].to_numpy().reshape(-1, 60, 2)
y_np_train = y_train.set_index(['window_id','time_step'])[['close']].to_numpy().reshape(-1, 10)

y_lr_train = pd.Series(np.diff(np.log(y_np_train),axis=1).mean(axis=1)).to_frame()

features = model.transform(X_np_train)

model_results = model.fit(y_lr_train, features)

In [50]:
X_np_test = x_test[['close','volume']].to_numpy().reshape(-1, 60, 2)

features_test = model.transform(X_np_test)

y_pred = model.predict(features_test)

In [51]:
y_pred_test = x_test[x_test['time_step']==59].drop(columns=['time_step']).copy(deep=True)

In [52]:
y_test_pred = pd.DataFrame(
    {
        'window_id': np.repeat(x_test['window_id'].unique(), 10),
        'time_step': np.tile(np.arange(10, dtype=np.int32), len(x_test['window_id'].unique())),
        'prev_close': np.repeat(x_test[x_test['time_step']==59]['close'], 10).values,
        'pred_ret' : np.exp(y_pred.loc[y_pred.index.repeat(10)].groupby(level=0).cumsum()).reset_index(drop=True)[0],
    }
)

y_test_pred['pred_close'] = y_test_pred['pred_ret'] * y_test_pred['prev_close']
y_test_pred = y_test_pred[['window_id','time_step','pred_close']]

In [54]:
import pickle
with open('/home/pduce/ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit/research/ridge/submissions/l2_10000_nonoverlapping/submission.pkl', 'wb') as f:
    pickle.dump(y_test_pred, f)

with open('/home/pduce/ICAIF_2025_Cryptocurrency_Forecasting_Starter_Kit/research/ridge/submissions/l2_10000_nonoverlapping/model_weights.pkl', 'wb') as f:
    pickle.dump(model, f)


In [53]:
y_test_pred

Unnamed: 0,window_id,time_step,pred_close
0,1,0,0.113098
1,1,1,0.113096
2,1,2,0.113094
3,1,3,0.113092
4,1,4,0.113090
...,...,...,...
499995,50000,5,0.169876
499996,50000,6,0.169872
499997,50000,7,0.169868
499998,50000,8,0.169863


In [14]:
y_test_pred

Unnamed: 0,window_id,time_step,pred_close
0,1,0,0.113100
1,1,1,0.113099
2,1,2,0.113099
3,1,3,0.113098
4,1,4,0.113098
...,...,...,...
499995,50000,5,0.169890
499996,50000,6,0.169888
499997,50000,7,0.169886
499998,50000,8,0.169885


In [49]:
y_test_pred

Unnamed: 0,window_id,time_step,pred_close
0,1,0,0.113100
1,1,1,0.113099
2,1,2,0.113099
3,1,3,0.113099
4,1,4,0.113099
...,...,...,...
499995,50000,5,0.169888
499996,50000,6,0.169886
499997,50000,7,0.169885
499998,50000,8,0.169883
