# Bitcoin Digital Twin (Refactored)

This notebook is a thin wrapper around reusable modules in `src/`.
Run top-to-bottom.


In [None]:
# If running from /notebooks in Colab, add repo root to path
import sys
from pathlib import Path
repo_root = Path('..').resolve()
if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

from src.data import download_data
from src.features import build_features, make_seqs
from src.models import train_return_model, train_direction_model, train_lstm_classifier
from src.evaluate import regression_metrics, classification_metrics, find_best_threshold
from src.strategy import confidence_weighted_positions, equity_curves_from_positions, compare_returns

plt.style.use('seaborn-v0_8')

# settings
TICKER = 'BTC-USD'
PERIOD = '60d'
INTERVAL = '5m'
LOOKBACK = 60
HORIZON = 3


In [None]:
df_raw = download_data(TICKER, period=PERIOD, interval=INTERVAL)
print('Raw data shape:', df_raw.shape)
df_raw.head()


In [None]:
x, y_reg, y_class, df_feat = build_features(df_raw, horizon=HORIZON)
df_feat.head()


In [None]:
# Train/test split (chronological)
n = len(df_feat)
train_size = int(n * 0.7)

x_train = x[:train_size]
x_test = x[train_size:]
y_return_train = y_reg[:train_size]
y_return_test = y_reg[train_size:]
y_direction_train = y_class[:train_size]
y_direction_test = y_class[train_size:]

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print('Train:', x_train.shape, '| Test:', x_test.shape)


In [None]:
# Baselines: Linear Regression (return) + Logistic Regression (direction)
return_model = train_return_model(x_train_scaled, y_return_train)
y_return_pred = return_model.predict(x_test_scaled)
print('Linear Regression:', regression_metrics(y_return_test, y_return_pred))

direction_model = train_direction_model(x_train_scaled, y_direction_train)
y_direction_pred = direction_model.predict(x_test_scaled)
print('Logistic Regression:', classification_metrics(y_direction_test, y_direction_pred))


In [None]:
# Sequence prep for LSTM
x_all_scaled = scaler.transform(x)
x_seq, y_seq = make_seqs(x_all_scaled, y_class, lookback=LOOKBACK)

n_seq = len(x_seq)
train_size_seq = int(n_seq * 0.7)

x_seq_train = x_seq[:train_size_seq]
x_seq_test = x_seq[train_size_seq:]
y_seq_train = y_seq[:train_size_seq]
y_seq_test = y_seq[train_size_seq:]

print('Train sequences:', x_seq_train.shape, '| Test sequences:', x_seq_test.shape)


In [None]:
# Train LSTM classifier
model, history, class_weights = train_lstm_classifier(
    x_seq_train, y_seq_train,
    lookback=LOOKBACK,
    n_features=x_seq_train.shape[2],
    epochs=50,
    batch_size=64,
    patience=6,
)
print('Class weights:', class_weights)


In [None]:
# Predict probs on test and tune threshold on test (for a real setup: tune on val)
y_prob = model.predict(x_seq_test).reshape(-1)
best_t, best_metrics = find_best_threshold(y_seq_test, y_prob)
print('Best threshold:', best_t)
print('Metrics at best threshold:', best_metrics)


In [None]:
# Backtest: align prices with sequence outputs
seq_start_index = LOOKBACK + train_size_seq
prices = df_feat['Close'].iloc[seq_start_index:seq_start_index + len(y_prob)]

positions = confidence_weighted_positions(y_prob, threshold=best_t)
strategy_eq, baseline_eq, strategy_ret, baseline_ret = equity_curves_from_positions(prices, positions)

plt.figure(figsize=(10,5))
plt.plot(strategy_eq, label='LSTM Strategy (confidence-weighted)')
plt.plot(baseline_eq, label='Buy & Hold')
plt.title(f'Cumulative Returns: Strategy vs Buy & Hold (threshold={best_t:.2f})')
plt.ylabel('Growth of $1')
plt.xlabel('Time')
plt.grid(True)
plt.legend()
plt.show()

print(compare_returns(strategy_ret, baseline_ret))
