# Timestamp Prediction: Compliance-First Multi-Model Benchmark

In [1]:
# Imports and data loading
import os
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr
import lightgbm as lgb
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [2]:
# Load data
root = Path('..').resolve()
data_dir = root / 'data'
train_path = data_dir / 'train.csv'
test_path = data_dir / 'test.csv'
assert train_path.exists() and test_path.exists(), 'train.csv or test.csv not found'
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)
train_df['Timestamp'] = pd.to_datetime(train_df['Timestamp'])
test_df['Timestamp'] = pd.to_datetime(test_df['Timestamp'])
train_df = train_df.sort_values('Timestamp').reset_index(drop=True)
test_df = test_df.sort_values('Timestamp').reset_index(drop=True)
print(train_df.head())
print(test_df.head())

            Timestamp  Open  High   Low  Close  Volume  Target
0 2012-01-01 10:00:00  4.58  4.58  4.58   4.58     0.0     0.0
1 2012-01-01 10:15:00  4.58  4.58  4.58   4.58     0.0     0.0
2 2012-01-01 10:30:00  4.58  4.58  4.58   4.58     0.0     0.0
3 2012-01-01 10:45:00  4.58  4.58  4.58   4.58     0.0     0.0
4 2012-01-01 11:00:00  4.58  4.58  4.58   4.58     0.0     0.0
            Timestamp      Open      High       Low     Close     Volume
0 2025-10-23 23:30:00  110113.0  110113.0  110001.0  110093.0   5.994213
1 2025-10-23 23:45:00  110093.0  110111.0  110003.0  110111.0   5.027084
2 2025-10-24 00:00:00  110110.0  110278.0  110033.0  110267.0  29.892445
3 2025-10-24 00:15:00  110228.0  110463.0  110180.0  110197.0  16.283404
4 2025-10-24 00:30:00  110197.0  110517.0  110169.0  110418.0   8.827779


# Data preprocessing and feature engineering
We build leakage-free temporal features and a log-return target. No test leakage or sign flipping is used; direction is fixed.

In [3]:
# Feature engineering utilities
def add_time_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out['hour'] = out['Timestamp'].dt.hour
    out['day'] = out['Timestamp'].dt.day
    out['weekday'] = out['Timestamp'].dt.weekday
    out['month'] = out['Timestamp'].dt.month
    return out

def add_price_features(df: pd.DataFrame, max_lag: int = 16) -> pd.DataFrame:
    out = df.copy()
    out['ret_1'] = np.log(out['Close'] / out['Close'].shift(1))
    for lag in [1,2,3,4,6,8,12,16]:
        out[f'lag_close_{lag}'] = out['Close'].shift(lag)
        out[f'lag_ret_{lag}'] = out['ret_1'].shift(lag)
    for w in [4,8,16,32]:
        out[f'roll_ret_mean_{w}'] = out['ret_1'].rolling(w).mean()
        out[f'roll_ret_std_{w}'] = out['ret_1'].rolling(w).std()
        out[f'roll_close_mean_{w}'] = out['Close'].rolling(w).mean()
        out[f'roll_close_std_{w}'] = out['Close'].rolling(w).std()
    out = out.replace([np.inf, -np.inf], np.nan)
    return out

def build_dataset(df: pd.DataFrame, is_train: bool = True):
    df = add_time_features(df)
    df = add_price_features(df)
    if is_train:
        # competition-compliant target: log-return to next step using only train data
        df['Target'] = np.log(df['Close'].shift(-1) / df['Close'])
        df = df.iloc[:-1]  # last row has no next close
    feature_cols = [c for c in df.columns if c not in ['Timestamp','Open','High','Low','Close','Volume','Target']]
    df = df.dropna(subset=feature_cols + (['Target'] if is_train else []))
    return df, feature_cols

# Build train/val sets
train_feat, feature_cols = build_dataset(train_df, is_train=True)
X = train_feat[feature_cols].values
y = train_feat['Target'].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
test_feat, _ = build_dataset(test_df, is_train=False)
X_test_s = scaler.transform(test_feat[feature_cols].values)
print(f'Train rows: {len(X_train)}, Val rows: {len(X_val)}, Test rows: {len(X_test_s)}')

Train rows: 387335, Val rows: 96834, Test rows: 2849


# Baseline compliance method
A simple ridge regression on engineered features, fixed direction (no sign flip).

In [5]:
# Baseline ridge regression
def evaluate(pred, name):
    pearson = pearsonr(pred, y_val)[0]
    mae = mean_absolute_error(y_val, pred)
    mse = mean_squared_error(y_val, pred)
    rmse = np.sqrt(mse)
    print(f"{name}: Pearson={pearson:.5f} MAE={mae:.6f} RMSE={rmse:.6f}")
    return pearson, mae, rmse

ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_train_s, y_train)
val_pred_ridge = ridge.predict(X_val_s)
evaluate(val_pred_ridge, 'Ridge baseline')

Ridge baseline: Pearson=0.00547 MAE=0.001601 RMSE=0.002513


(np.float64(0.00547324182578654),
 0.001600591550523547,
 np.float64(0.0025130183195143643))

# LightGBM model
Train a gradient boosted trees model tuned for small learning rate and moderate depth.

In [6]:
# Train LightGBM
lgb_train = lgb.Dataset(X_train_s, label=y_train)
lgb_val = lgb.Dataset(X_val_s, label=y_val, reference=lgb_train)
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.01,
    'num_leaves': 63,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'min_child_samples': 50,
    'reg_alpha': 0.1,
    'reg_lambda': 0.1,
    'verbosity': -1,
    'seed': 42,
}
lgb_model = lgb.train(
    lgb_params,
    lgb_train,
    num_boost_round=1200,
    valid_sets=[lgb_val],
    callbacks=[lgb.early_stopping(80, verbose=False)]
)
val_pred_lgb = lgb_model.predict(X_val_s)
evaluate(val_pred_lgb, 'LightGBM')

LightGBM: Pearson=0.03278 MAE=0.001579 RMSE=0.002493


(np.float64(0.03278381182759346),
 0.0015785081800397814,
 np.float64(0.0024930491707538037))

# XGBoost model
Parallel gradient boosting baseline.

In [7]:
# Train XGBoost
dtrain = xgb.DMatrix(X_train_s, label=y_train)
dval = xgb.DMatrix(X_val_s, label=y_val)
xgb_params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'learning_rate': 0.01,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'lambda': 0.1,
    'alpha': 0.1,
    'seed': 42,
}
xgb_model = xgb.train(
    xgb_params,
    dtrain,
    num_boost_round=1500,
    evals=[(dval, 'val')],
    early_stopping_rounds=100,
    verbose_eval=False
)
val_pred_xgb = xgb_model.predict(xgb.DMatrix(X_val_s))
evaluate(val_pred_xgb, 'XGBoost')

XGBoost: Pearson=0.02633 MAE=0.001579 RMSE=0.002494


(np.float64(0.02632717586681382),
 0.0015792742906270947,
 np.float64(0.002493501782157594))

# LightGBM + XGBoost ensemble
Weighted averaging without sign flipping.

In [8]:
# Ensemble
w_lgb, w_xgb = 0.5, 0.5
val_pred_ens = w_lgb * val_pred_lgb + w_xgb * val_pred_xgb
evaluate(val_pred_ens, 'Ensemble (LGB+XGB)')

Ensemble (LGB+XGB): Pearson=0.03062 MAE=0.001579 RMSE=0.002493


(np.float64(0.03061500297118868),
 0.0015788541271964068,
 np.float64(0.002493188393189832))

# Bidirectional LSTM model
Sequence-to-one forecasting on scaled features.

In [9]:
# Prepare sequences for BiLSTM
def make_sequences(X_arr, y_arr, lookback=32):
    X_seq, y_seq = [], []
    for i in range(lookback, len(X_arr)):
        X_seq.append(X_arr[i-lookback:i])
        y_seq.append(y_arr[i])
    return np.array(X_seq), np.array(y_seq)

lookback = 32
X_train_seq, y_train_seq = make_sequences(X_train_s, y_train, lookback)
X_val_seq, y_val_seq = make_sequences(np.vstack([X_train_s[-lookback:], X_val_s]), np.concatenate([y_train[-lookback:], y_val]), lookback)
print(X_train_seq.shape, X_val_seq.shape)

tf.random.set_seed(42)
bilstm = keras.Sequential([
    layers.Input(shape=(lookback, X_train_s.shape[1])),
    layers.Bidirectional(layers.LSTM(64, return_sequences=True)),
    layers.Dropout(0.1),
    layers.Bidirectional(layers.LSTM(32)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])
bilstm.compile(optimizer=keras.optimizers.Adam(1e-3), loss='mse')
history = bilstm.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=20,
    batch_size=256,
    verbose=1,
    callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
)
val_pred_bilstm = bilstm.predict(X_val_seq, verbose=0).reshape(-1)
evaluate(val_pred_bilstm, 'BiLSTM')

(387303, 32, 37) (96834, 32, 37)
BiLSTM: Pearson=0.02597 MAE=0.001644 RMSE=0.002539
BiLSTM: Pearson=0.02597 MAE=0.001644 RMSE=0.002539


(np.float64(0.025973121079580597),
 0.0016442828235145928,
 np.float64(0.002538985404502768))

# Model evaluation and comparison
Collect validation metrics and choose the top performer (by Pearson).

In [11]:
# Compare and select best model
results = []
def add_result(name, val_pred):
    pearson = pearsonr(val_pred, y_val)[0]
    mae = mean_absolute_error(y_val, val_pred)
    mse = mean_squared_error(y_val, val_pred)  # older sklearn compatibility
    rmse = np.sqrt(mse)
    results.append({'model': name, 'pearson': pearson, 'mae': mae, 'rmse': rmse})

add_result('Ridge', val_pred_ridge)
add_result('LightGBM', val_pred_lgb)
add_result('XGBoost', val_pred_xgb)
add_result('Ensemble_LGB_XGB', val_pred_ens)
add_result('BiLSTM', val_pred_bilstm)
results_df = pd.DataFrame(results).sort_values('pearson', ascending=False)
display(results_df)
best_model = results_df.iloc[0]['model']
print(f'Best model by Pearson: {best_model}')

# Prepare test predictions for each model
test_pred_ridge = ridge.predict(X_test_s)
test_pred_lgb = lgb_model.predict(X_test_s)
test_pred_xgb = xgb_model.predict(xgb.DMatrix(X_test_s))
test_pred_ens = w_lgb * test_pred_lgb + w_xgb * test_pred_xgb

# BiLSTM test prediction
def make_test_sequences(X_hist, X_future, lookback=32):
    concat = np.vstack([X_hist, X_future])
    X_seq = []
    for i in range(len(X_hist), len(concat)):
        start = i - lookback
        end = i
        if start >= 0:
            X_seq.append(concat[start:end])
    return np.array(X_seq)

X_hist = X_train_s[-lookback:]
X_test_seq = make_test_sequences(X_hist, X_test_s, lookback)
test_pred_bilstm = bilstm.predict(X_test_seq, verbose=0).reshape(-1)

test_preds_map = {
    'Ridge': test_pred_ridge,
    'LightGBM': test_pred_lgb,
    'XGBoost': test_pred_xgb,
    'Ensemble_LGB_XGB': test_pred_ens,
    'BiLSTM': test_pred_bilstm,
}

best_test_pred = test_preds_map[best_model]
submission = pd.DataFrame({'row_id': np.arange(len(best_test_pred)), 'Target': best_test_pred})
sub_path = root / 'submissions' / 'timestamp_prediction_submission.csv'
sub_path.parent.mkdir(parents=True, exist_ok=True)
submission.to_csv(sub_path, index=False)
print(f'Saved submission: {sub_path}')

Unnamed: 0,model,pearson,mae,rmse
1,LightGBM,0.032784,0.001579,0.002493
3,Ensemble_LGB_XGB,0.030615,0.001579,0.002493
2,XGBoost,0.026327,0.001579,0.002494
4,BiLSTM,0.025973,0.001644,0.002539
0,Ridge,0.005473,0.001601,0.002513


Best model by Pearson: LightGBM
Saved submission: E:\github\crypto_forecast\submissions\timestamp_prediction_submission.csv
Saved submission: E:\github\crypto_forecast\submissions\timestamp_prediction_submission.csv
