Testing Notebook

This notebook will be used to import and run the programmed scripts to keep outputs contained into just this file.

0. Initial import of modules

In [1]:
import os
import torch
print(os.getcwd())
torch.manual_seed(42)

c:\Users\benjo\Documents\Projects\crypto-lstm-trader


<torch._C.Generator at 0x2987eb9f2d0>

In [None]:
from preprocessing.fetch_binance import fetch_binance
from preprocessing.prep_data import feature_creation, z_score_norm, window_creation, train_val_test_split, make_label_k_epsilon

1. Obtaining the data

In [3]:
df = fetch_binance(
    exchange_ticker='BTC/USDT',
    start_date='2017-01-01T00:00:00Z',
    timeframe='1h',
    cache_path=('./data/usd_btc_binance.csv'),
    max_age_hrs=3
)
df.head()

Loading prexisting data from ./data/usd_btc_binance.csv


Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-08-17 04:00:00,4261.48,4313.62,4261.32,4308.83,47.181009
2017-08-17 05:00:00,4308.83,4328.69,4291.37,4315.32,23.234916
2017-08-17 06:00:00,4330.29,4345.45,4309.37,4324.35,7.229691
2017-08-17 07:00:00,4316.62,4349.99,4287.41,4349.99,4.443249
2017-08-17 08:00:00,4333.32,4377.85,4333.32,4360.69,0.972807


2. Normalisation and Window Creation

In [None]:
import numpy as np
#df = target_def_knext(df,4)
labeled_df = make_label_k_epsilon(df,12,0.7,0.7,True)
mod_df = feature_creation(labeled_df)

norm_df = z_score_norm(mod_df, train_frac= 0.7)

x, y = window_creation(norm_df, window_size=48)

x.shape, y.shape


(torch.Size([24796, 48, 41]), torch.Size([24796]))

3. Splitting data into test, validation and training sets

In [5]:
x_train, y_train, x_val, y_val,x_test, y_test = train_val_test_split(x, y)
x_lengths = map(len,[x_train,x_val,x_test])
y_lengths = map(len,[y_train,y_val,y_test])
print(list(x_lengths))
print(list(y_lengths)) 


[17357, 3719, 3720]
[17357, 3719, 3720]


4. Wrap the data in custom datasets so torch dataloaders can be used

In [6]:
from torch.utils.data import DataLoader
from datasets.sequence_dataset import SequenceDataset

batch_size= 64
train_loader= DataLoader(SequenceDataset(x_train,y_train),batch_size=batch_size, shuffle=True)
val_loader = DataLoader(SequenceDataset(x_val,y_val), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(SequenceDataset(x_test,y_test),batch_size=batch_size, shuffle=False)


In [7]:
def inspect_split(loader):
    n = 0; pos = 0
    for _, y in loader:
        y = y
        n += y.numel()
        pos += y.sum().item()
    p = pos / n
    print(f"Samples={n}, Positives={pos} ({p:.3f})")
    return p

print("Train split:"); p_train = inspect_split(train_loader)
print("Val split:");   p_val   = inspect_split(val_loader)


Train split:
Samples=17357, Positives=9179.0 (0.529)
Val split:
Samples=3719, Positives=1991.0 (0.535)


5. Model set up and Training

In [None]:
from models.lstm import LSTMClassifier
from train.train_test_model import fit , positive_weight, select_threshold_constrained, threshold_free_metrics

input_size = x_train.size(-1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LSTMClassifier(input_size=input_size,hidden_size=128,num_layers= 2)
pos_weight = positive_weight(y_train,device)
model,t_star,history = fit(
        model,
        train_loader=train_loader,
        val_loader=val_loader,
        epochs=40,
        lr=1e-3,              
        device=device,
        save_path="models/best.pt",
        patience= 7,            # optional early stopping
        pos_weight= None
    )

Training results of batch 1Loss : 0.690578, Accuracy : 0.578125Pre-Clip param norm: 0.089817
Training results of batch 50Loss : 0.691178, Accuracy : 0.484375Pre-Clip param norm: 0.059270
Training results of batch 100Loss : 0.695871, Accuracy : 0.406250Pre-Clip param norm: 0.063831
Training results of batch 150Loss : 0.702213, Accuracy : 0.500000Pre-Clip param norm: 0.098106
Training results of batch 200Loss : 0.674775, Accuracy : 0.578125Pre-Clip param norm: 0.079176
Training results of batch 250Loss : 0.689940, Accuracy : 0.500000Pre-Clip param norm: 0.111646
Epoch num: 1Train: loss 0.6871, acc 0.5438Val: loss 0.6990, acc 0.5284,ROC-AUC 0.521,PR-AUC 0.556Best Thr(F1) 0.564, F1 -1.000, P 0.000, R 0.000, PosRate 0.535
New Model saved: PR-AUC = 0.5563 (t*=0.564)
Training results of batch 1Loss : 0.707619, Accuracy : 0.500000Pre-Clip param norm: 0.191558
Training results of batch 50Loss : 0.694645, Accuracy : 0.500000Pre-Clip param norm: 0.232904
Training results of batch 100Loss : 0.6963

Beginning of HyperTuning - Testing to see the model can learn and if changing things elsewhere (data/features/splitting/optimization)

In [None]:
def tiny_overfit(model, train_ds, steps=500, k=64, lr=3e-3):
    from torch.optim import AdamW
    from torch.nn.utils import clip_grad_norm_
    import random, torch, torch.nn as nn
    idx = torch.randperm(len(train_ds))[:k]
    X_small = []; y_small = []
    for i in idx:
        X, y = train_ds[i]
        X_small.append(X.unsqueeze(0)); y_small.append(y)
    X_small = torch.cat(X_small, dim=0)  # [k, T, F]
    y_small = torch.tensor(y_small).float()  # [k]

    model.train()
    opt = AdamW(model.parameters(), lr=lr)
    crit = torch.nn.BCEWithLogitsLoss()
    for t in range(steps):
        opt.zero_grad(set_to_none=True)
        logits = model(X_small.to(device))
        loss = crit(logits.squeeze(-1), y_small.to(device))
        loss.backward()
        clip_grad_norm_(model.parameters(), 1.0)
        opt.step()
        if (t+1) % 50 == 0:
            with torch.no_grad():
                p = (logits.squeeze(-1).sigmoid()>0.5).float()
                acc = (p.cpu()==y_small).float().mean().item()
            print(f'{t+1:04d} | loss {loss.item():.4f} | acc {acc:.3f}')

tiny_overfit(
    model=model,
    train_ds=SequenceDataset(x_train,y_train),
    steps=500,   # updates, not epochs
    k=64,        # number of samples to memorize
    lr=3e-3      # slightly higher LR for speed
)

#FROM THIS CELLS OUTPUT OF AN ACCURACY OF 1+ CONSISTENLY AND LOSS OF 0  SO MODEL CAN DEFINETLY LEARN

0050 | loss 0.0006 | acc 1.000
0100 | loss 0.0001 | acc 1.000
0150 | loss 0.0001 | acc 1.000
0200 | loss 0.0000 | acc 1.000
0250 | loss 0.0000 | acc 1.000
0300 | loss 0.0000 | acc 1.000
0350 | loss 0.0000 | acc 1.000
0400 | loss 0.0000 | acc 1.000
0450 | loss 0.0000 | acc 1.000
0500 | loss 0.0000 | acc 1.000


In [None]:
from sklearn.linear_model import LogisticRegression

#Test a regression model on data

def make_tabular_last(norm_df, window_size=48):
    feats = norm_df.drop(columns=['label'])
    y = norm_df['label'].to_numpy().astype(int)
    Xrows, yrows = [], []
    for i in range(len(norm_df) - window_size + 1):
        j = i + window_size - 1
        Xrows.append(feats.iloc[j].to_numpy())
        yrows.append(y[j])
    X = np.vstack(Xrows); y = np.array(yrows)
    return X, y

def chrono_split(X, y, train_frac=0.7, val_frac=0.15):
    n = len(y)
    i_tr = int(n * train_frac)
    i_v  = int(n * (train_frac + val_frac))
    return (X[:i_tr], y[:i_tr]), (X[i_tr:i_v], y[i_tr:i_v]), (X[i_v:], y[i_v:])

def eval_with_constraints(probs, y, min_pos_rate=0.05, max_pos_rate=0.95, min_precision=None):
    roc, pr = threshold_free_metrics(probs, y)
    tinfo = select_threshold_constrained(probs, y, min_pos_rate, max_pos_rate, min_precision)
    if tinfo["f1"] < 0:
        return {"roc": roc, "pr": pr, "t": None, "f1": None, "prec": None, "rec": None, "pos_rate": None}
    yhat = (probs >= tinfo["t"])
    return {
        "roc": roc, "pr": pr, "t": tinfo["t"], "f1": tinfo["f1"],
        "prec": precision_score(y, yhat, zero_division=0),
        "rec":  recall_score(y, yhat, zero_division=0),
        "pos_rate": float(yhat.mean())
    }

def logistic_baseline(norm_df, window_size=48, C=1.0, max_iter=2000):
    X, y = make_tabular_last(norm_df, window_size)
    (Xtr, ytr), (Xv, yv), (Xte, yte) = chrono_split(X, y, 0.7, 0.15)
    cls_wt = "balanced" if (ytr.mean() < 0.35 or ytr.mean() > 0.65) else None
    lr = LogisticRegression(C=C, max_iter=max_iter, solver="lbfgs", class_weight=cls_wt)
    lr.fit(Xtr, ytr)

    pv = lr.predict_proba(Xv)[:,1]
    min_prec = max(0.55, float(yv.mean()))
    val_metrics = eval_with_constraints(pv, yv, 0.05, 0.95, min_prec)

    pt = lr.predict_proba(Xte)[:,1]
    if val_metrics["t"] is not None:
        yhat_t = (pt >= val_metrics["t"])
        test_metrics = {
            "roc": roc_auc_score(yte, pt),
            "pr":  average_precision_score(yte, pt),
            "f1":  f1_score(yte, yhat_t, zero_division=0),
            "prec": precision_score(yte, yhat_t, zero_division=0),
            "rec":  recall_score(yte, yhat_t, zero_division=0),
            "pos_rate": float(yhat_t.mean()),
            "t": val_metrics["t"],
        }
    else:
        test_metrics = {"roc": roc_auc_score(yte, pt), "pr": average_precision_score(yte, pt),
                        "f1": None, "prec": None, "rec": None, "pos_rate": None, "t": None}
    return val_metrics, test_metrics

val_m, test_m = logistic_baseline(norm_df, window_size=48)
print("LogReg VAL:", val_m)
print("LogReg TEST:", test_m)


LogReg VAL: {'roc': 0.5608382604492042, 'pr': 0.5825557162074742, 't': 0.4599999999999999, 'f1': 0.6708587148752211, 'prec': 0.5511785598966742, 'rec': 0.8569277108433735, 'pos_rate': 0.8325268817204301}
LogReg TEST: {'roc': 0.6011337047410288, 'pr': 0.6291387219592178, 'f1': 0.6142034548944337, 'prec': 0.5799728137743543, 'rec': 0.6527281998980112, 'pos_rate': 0.5932795698924731, 't': 0.4599999999999999}


In [None]:
feat_df = feature_creation(df) 

#Try a few labels: k in {6,8,12}, epsilon= 60% or 70% 
tests = []
for k in (6, 8, 12):
    for q in (0.60, 0.70):
        labeled = make_label_k_epsilon(feat_df, k=k, eps_quantile=q, train_frac=0.7, use_log_returns=True)
        norm = z_score_norm(labeled, train_frac=0.7)

        # Find baseline results for comapring
        val_m, test_m = logistic_baseline(norm, window_size=48)  
        tests.append((k, q, val_m["pr"], val_m["roc"], test_m["pr"], test_m["roc"]))


print("k  eps_q   VAL_PR   VAL_ROC   TEST_PR  TEST_ROC")
for k, q, vpr, vroc, tpr, troc in tests:
    print(f"{k:<2} {q:<5}  {vpr:.3f}    {vroc:.3f}     {tpr:.3f}    {troc:.3f}")


best = max(tests, key=lambda x: x[2])
print("\nBEST (by VAL_PR): k=%d, eps_q=%.2f  VAL_PR=%.3f  VAL_ROC=%.3f" % (best[0], best[1], best[2], best[3]))

k  eps_q   VAL_PR   VAL_ROC   TEST_PR  TEST_ROC
6  0.6    0.560    0.548     0.572    0.545
6  0.7    0.558    0.571     0.582    0.565
8  0.6    0.568    0.554     0.585    0.551
8  0.7    0.575    0.567     0.594    0.565
12 0.6    0.588    0.574     0.603    0.568
12 0.7    0.578    0.580     0.648    0.594

BEST (by VAL_PR): k=12, eps_q=0.60  VAL_PR=0.588  VAL_ROC=0.574


6. Testing the model on test data set and comparing outputs

In [None]:
from train.train_test_model import evaluate_test 

model.load_state_dict(torch.load("models/best.pt", map_location=device))
model.to(device)

test_report, test_probs, test_y = evaluate_test(model, test_loader, device, t_star)
print("TEST REPORT:", test_report)

# (optional) confusion matrix counts
yhat = (test_probs >= test_report["t"])
tn = int(((yhat == 0) & (test_y == 0)).sum())
tp = int(((yhat == 1) & (test_y == 1)).sum())
fn = int(((yhat == 0) & (test_y == 1)).sum())
fp = int(((yhat == 1) & (test_y == 0)).sum())
print({"True Pos": tp, "False Pos": fp, "True Neg": tn, "False Neg": fn})


TEST REPORT: {'roc': 0.6053970258576201, 'pr': 0.6294132388074424, 'f1': 0.40837336993822926, 'prec': 0.625, 'rec': 0.30326197757390416, 'pos_rate': 0.25591397849462366, 't': 0.5399999999999999, 'prevalence': 0.5274193548387097, 'n': 3720}
{'TP': 595, 'FP': 357, 'TN': 1401, 'FN': 1367}
