# Bitcoin 5m Direction Prediction with LSM + Mixed GeoSparse (psann==0.12.3)

This notebook pulls 5-minute BTC/USD candles, builds a binary target (`future_close > current_close`), and trains a `GeoSparseRegressor` with `activation_type='mixed'` and an LSM expander preprocessor.


In [None]:
%pip install -q psann==0.12.3 yfinance pandas scikit-learn matplotlib


In [None]:
import random
import numpy as np
import pandas as pd
import torch
import yfinance as yf
from sklearn.preprocessing import StandardScaler

SEED = 42
TICKER = 'BTC-USD'
INTERVAL = '5m'
PERIOD = '30d'
HORIZON_BARS = 3  # 3 x 5m = 15m ahead direction

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

raw = yf.download(
    tickers=TICKER,
    interval=INTERVAL,
    period=PERIOD,
    auto_adjust=False,
    progress=False,
)
if raw.empty:
    raise RuntimeError('No data returned. Try a different period or rerun.')

if isinstance(raw.columns, pd.MultiIndex):
    raw.columns = raw.columns.get_level_values(0)

df = raw.rename(columns=str.lower)[['open', 'high', 'low', 'close', 'volume']].copy()

# Feature engineering
df['ret_1'] = df['close'].pct_change(1)
df['ret_3'] = df['close'].pct_change(3)
df['ret_12'] = df['close'].pct_change(12)
df['hl_spread'] = (df['high'] - df['low']) / df['close']
df['oc_spread'] = (df['close'] - df['open']) / df['open']
df['vol_chg'] = df['volume'].replace(0, np.nan).pct_change()
for w in (6, 12, 24):
    df[f'ma_ratio_{w}'] = df['close'].rolling(w).mean() / df['close'] - 1.0
    df[f'vol_{w}'] = df['ret_1'].rolling(w).std()

# Binary target: next-HORIZON_BARS direction
df['target'] = (df['close'].shift(-HORIZON_BARS) > df['close']).astype(int)

df = df.replace([np.inf, -np.inf], np.nan).dropna().copy()

FEATURE_COLS = [
    'ret_1', 'ret_3', 'ret_12',
    'hl_spread', 'oc_spread', 'vol_chg',
    'ma_ratio_6', 'ma_ratio_12', 'ma_ratio_24',
    'vol_6', 'vol_12', 'vol_24',
]

X = df[FEATURE_COLS].to_numpy(dtype=np.float32)
y = df['target'].to_numpy(dtype=np.float32)

n = len(df)
i_train = int(n * 0.70)
i_val = int(n * 0.85)

X_train_raw, X_val_raw, X_test_raw = X[:i_train], X[i_train:i_val], X[i_val:]
y_train, y_val, y_test = y[:i_train], y[i_train:i_val], y[i_val:]
idx_test = df.index[i_val:]

scaler = StandardScaler().fit(X_train_raw)
X_train = scaler.transform(X_train_raw).astype(np.float32)
X_val = scaler.transform(X_val_raw).astype(np.float32)
X_test = scaler.transform(X_test_raw).astype(np.float32)

print({'rows': n, 'train': len(X_train), 'val': len(X_val), 'test': len(X_test), 'pos_rate': float(y.mean())})


In [None]:
import time
from psann import GeoSparseRegressor

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LSM_OUTPUT_DIM = 64
SHAPE = (8, 8)
assert SHAPE[0] * SHAPE[1] == LSM_OUTPUT_DIM

lsm_cfg = {
    'type': 'lsmexpander',
    'output_dim': LSM_OUTPUT_DIM,
    'hidden_layers': 2,
    'hidden_units': 128,
    'sparsity': 0.8,
    'nonlinearity': 'sine',
    'epochs': 30,
    'batch_size': 256,
    'lr': 1e-3,
    'early_stopping': True,
    'patience': 8,
    'random_state': SEED,
    'verbose': 0,
}

model = GeoSparseRegressor(
    shape=SHAPE,
    hidden_layers=6,
    k=16,
    activation_type='mixed',
    activation={
        'activation_types': ['psann', 'relu', 'tanh'],
        'activation_ratios': [0.5, 0.35, 0.15],
        'layout': 'random',
        'mix_seed': SEED,
    },
    lsm=lsm_cfg,
    lsm_train=True,
    lsm_pretrain_epochs=20,
    lsm_lr=5e-4,
    loss='mse',
    epochs=60,
    batch_size=512,
    lr=1e-3,
    optimizer='adam',
    early_stopping=False,
    patience=10,
    random_state=SEED,
    device=DEVICE,
    amp=torch.cuda.is_available(),
    amp_dtype='bfloat16',
)

t0 = time.perf_counter()
# Workaround: psann 0.12.3 can double-apply LSM when validation_data is passed.
model.fit(X_train, y_train, verbose=1)
train_time_s = time.perf_counter() - t0
print({'device': DEVICE, 'train_time_s': round(train_time_s, 2)})


In [None]:
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score, roc_auc_score

def sigmoid(x):
    x = np.asarray(x, dtype=np.float32)
    return 1.0 / (1.0 + np.exp(-np.clip(x, -20, 20)))

val_score = np.asarray(model.predict(X_val), dtype=np.float32).reshape(-1)
test_score = np.asarray(model.predict(X_test), dtype=np.float32).reshape(-1)
val_prob = sigmoid(val_score)
test_prob = sigmoid(test_score)

grid = np.linspace(0.35, 0.65, 61)
best_thr = max(grid, key=lambda t: balanced_accuracy_score(y_val, (val_prob >= t).astype(int)))
test_pred = (test_prob >= best_thr).astype(int)

metrics = {
    'threshold': float(best_thr),
    'n_test': int(len(y_test)),
    'pos_rate_test': float(y_test.mean()),
    'acc': float(accuracy_score(y_test, test_pred)),
    'balanced_acc': float(balanced_accuracy_score(y_test, test_pred)),
    'precision': float(precision_score(y_test, test_pred, zero_division=0)),
    'recall': float(recall_score(y_test, test_pred, zero_division=0)),
    'roc_auc': float(roc_auc_score(y_test, test_prob)),
}

display(pd.DataFrame([metrics]).round(4))

pred_df = pd.DataFrame({
    'close': df.loc[idx_test, 'close'].to_numpy(),
    'target': y_test.astype(int),
    'prob_up': test_prob,
    'pred': test_pred,
}, index=idx_test)
pred_df.tail(10)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))
pred_df['prob_up'].rolling(24).mean().plot()
plt.title('BTC 5m: rolling mean predicted probability of UP move (24 bars)')
plt.ylabel('P(up)')
plt.grid(True, alpha=0.3)
plt.show()
