In [None]:
from BounceCheckStrategy import BounceTrackStrategy
from backtest import backtest_market

In [None]:
import numpy as np
import pandas as pd
import json
import os
import datetime as dt
from tqdm import tqdm
from collections import defaultdict

### Some additional (unnecessary for this specific example) data loading and cleaning logic

In [3]:
path = "btc_contracts"
btc_contract_data = os.listdir(path)
btc_contract_slugs = [filename.split(".")[0] for filename in btc_contract_data]

FileNotFoundError: [Errno 2] No such file or directory: 'btc_contracts'

In [4]:
path = "Chainlink_prices"
chainlink_data = os.listdir(path)
chainlink_data = sorted(chainlink_data)

In [10]:
# clear out the entries that use symbol_timestamp instead of timestamp
chainlink_data = sorted(chainlink_data)[2:]
chainlink_data

['chainlink_crypto_prices_2025-10-31.jsonl',
 'chainlink_crypto_prices_2025-11-01.jsonl',
 'chainlink_crypto_prices_2025-11-02.jsonl',
 'chainlink_crypto_prices_2025-11-03.jsonl',
 'chainlink_crypto_prices_2025-11-04.jsonl',
 'chainlink_crypto_prices_2025-11-05.jsonl']

In [27]:
def parse_jsonl_prices(file_path):
    json_df = pd.read_json(path_or_buf=file_path, lines=True, convert_dates=False)
    json_df = json_df.rename(columns={"timestamp":"delivery_timestamp"})
    expanded_payload = pd.json_normalize(json_df['payload'])
    expanded_payload = expanded_payload.rename(columns={'timestamp': "value_timestamp"})
    crypto_df = pd.concat([json_df.drop(['payload', 'symbol', 'symbol_timestamp'], axis=1, errors="ignore"), expanded_payload], axis=1)

    return crypto_df

In [28]:
file_path = f"{path}/{chainlink_data[0]}"
crypto_df = parse_jsonl_prices(file_path)
crypto_df.head(1)

Unnamed: 0,connection_id,delivery_timestamp,topic,type,full_accuracy_value,symbol,value_timestamp,value
0,TSe1ndKFrPECEaw=,1761875106404,crypto_prices_chainlink,update,3838450000000000000000,eth/usd,1761875105000,3838.45


In [29]:
btc_df = crypto_df[crypto_df['symbol'] == 'btc/usd']

In [25]:
# for every market, check if the start and end times are in the crypto price data
# if not, we need to note that and skip

btc_df['value_timestamp'].isin([1761875105000, 1761875106000]).sum()

np.int64(2)

In [41]:
# dt_object_local = dt.datetime.fromtimestamp(int(start_time/1000))
# dt_object_local.date() == 

datetime.date(2025, 11, 5)

In [43]:
# {dt_object_local.date():2}

{datetime.date(2025, 11, 5): 2}

### 1 Market Testing for up down

In [5]:
## backtest inputs
crypto_data_filepath = "btc_and_price_data/"
crypto_data_filename = "chainlink_crypto_prices_2025-10-30.jsonl"
market_data_filepath = "btc_and_price_data/"
crypto_symbol = "btc/usd"
market_slug = "btc-updown-15m-1761786000"
unix_start_time = 1_761_786_000_000
unix_end_time = unix_start_time + 900_000 # buffer
warm_up_duration = 900_000 # 5 minutes in milliseconds?
time_precision = 13 # in millisecond format (10 = second format)

bounce_pairs = []
# (start, up, down) 0.005 and 0.995 are basically 1 (no bids/asks)
space = np.linspace(0,1,101)
space[0] += 0.005
space[-1] -= 0.005
for i, start in enumerate(space):
    for up in space[i+1:]:
        for down in space[:i]:
            bounce_pairs.append((start, up, down))

strategy = BounceTrackStrategy(bounce_pairs, unix_start_time, unix_end_time, time_precision=13, effective_memory=300)

warm_up_data, backtest_data = backtest_market(crypto_data_filepath, crypto_data_filename, crypto_symbol,
                                market_data_filepath, market_slug,
                                unix_start_time, unix_end_time,
                                warm_up_duration,
                                strategy,
                                time_precision=13 
                            )



In [6]:
backtest_df = pd.DataFrame(backtest_data)
backtest_df.head()

Unnamed: 0,timestamp,crypto_price,vol,market_spread,best_bid,best_ask,theo_price,mid_price,lwm_price,fraction_pairs_resolved,fraction_pairs_open,resolved_pairs,unresolved_pairs
0,1761786000000,110561.011739,0.171043,,0.0,1.0,,,,0.0,1.0,[],"[0.100 up to 0.200 or down to 0.005, 0.100 up ..."
1,1761786001000,110561.03587,0.170797,,0.0,1.0,0.447996,,,0.0,1.0,[],"[0.100 up to 0.200 or down to 0.005, 0.100 up ..."
2,1761786002000,110563.051548,0.170512,,0.0,1.0,0.455873,,,0.0,1.0,[],"[0.100 up to 0.200 or down to 0.005, 0.100 up ..."
3,1761786003000,110562.101548,0.17033,,0.0,1.0,0.452111,,,0.0,1.0,[],"[0.100 up to 0.200 or down to 0.005, 0.100 up ..."
4,1761786004000,110559.879228,0.170069,,0.0,1.0,0.443299,,,0.0,1.0,[],"[0.100 up to 0.200 or down to 0.005, 0.100 up ..."


In [7]:
backtest_df = backtest_df[(backtest_df['timestamp']>=unix_start_time) & (backtest_df['timestamp']<=unix_end_time)]

In [17]:
backtest_df.iloc[-1]

timestamp                                                      1761786900000
crypto_price                                                   110512.040682
vol                                                                 0.282663
market_spread                                                           0.05
best_bid                                                                0.01
best_ask                                                                0.06
theo_price                                                               0.0
mid_price                                                              0.035
lwm_price                                                           0.059431
fraction_pairs_resolved                                             0.721212
fraction_pairs_open                                                 0.278788
resolved_pairs             [0.500 up to 0.600 or down to 0.400, 0.500 up ...
unresolved_pairs           [0.100 up to 0.800 or down to 0.005, 0.100 up ...

In [8]:
bounce_pairs = backtest_df.iloc[-1].loc["resolved_pairs"]
active_pairs = backtest_df.iloc[-1].loc["unresolved_pairs"]

In [21]:
for bounce_pair in bounce_pairs:
    print(f"Start: {bounce_pair.start_value:.3f} | UP/DOWN {bounce_pair.up_value:.3f}/{bounce_pair.down_value:.3f} | Side Hit: {bounce_pair.side_hit_first}")

Start: 0.500 | UP/DOWN 0.600/0.400 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.700/0.400 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.800/0.400 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.900/0.400 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.995/0.400 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.500/0.300 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.600/0.300 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.700/0.300 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.800/0.300 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.900/0.300 | Side Hit: -1
Start: 0.400 | UP/DOWN 0.995/0.300 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.600/0.300 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.700/0.300 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.800/0.300 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.900/0.300 | Side Hit: -1
Start: 0.500 | UP/DOWN 0.995/0.300 | Side Hit: -1
Start: 0.300 | UP/DOWN 0.400/0.200 | Side Hit: -1
Start: 0.300 | UP/DOWN 0.500/0.200 | Side Hit: -1
Start: 0.300 | UP/DOWN 0.600/0.200 | Side Hit: -1
Start: 0.300 | UP/DOWN 0.700/0.200 | Side Hit: -1


### Performing the Backtest on many markets to get data

In [None]:
path = "btc_contracts-2"
contract_data_files = os.listdir(path)

In [None]:
sorted(contract_data_files)

['btc-updown-15m-1762020900.jsonl',
 'btc-updown-15m-1762021800.jsonl',
 'btc-updown-15m-1762022700.jsonl',
 'btc-updown-15m-1762023600.jsonl',
 'btc-updown-15m-1762024500.jsonl',
 'btc-updown-15m-1762025400.jsonl',
 'btc-updown-15m-1762026300.jsonl',
 'btc-updown-15m-1762027200.jsonl',
 'btc-updown-15m-1762028100.jsonl',
 'btc-updown-15m-1762029000.jsonl',
 'btc-updown-15m-1762029900.jsonl',
 'btc-updown-15m-1762030800.jsonl',
 'btc-updown-15m-1762031700.jsonl',
 'btc-updown-15m-1762032600.jsonl',
 'btc-updown-15m-1762033500.jsonl',
 'btc-updown-15m-1762034400.jsonl',
 'btc-updown-15m-1762035300.jsonl',
 'btc-updown-15m-1762036200.jsonl',
 'btc-updown-15m-1762037100.jsonl',
 'btc-updown-15m-1762038000.jsonl',
 'btc-updown-15m-1762038900.jsonl',
 'btc-updown-15m-1762039800.jsonl',
 'btc-updown-15m-1762040700.jsonl',
 'btc-updown-15m-1762041600.jsonl',
 'btc-updown-15m-1762042500.jsonl',
 'btc-updown-15m-1762043400.jsonl',
 'btc-updown-15m-1762044300.jsonl',
 'btc-updown-15m-1762045200.

In [None]:
path = "btc_contracts-2"
contract_data_files = os.listdir(path)
contract_slugs = [filename.split(".")[0] for filename in contract_data_files]

up_down_market_data = {}
for market_slug in tqdm(contract_slugs):
    
    # Crypto data doesn't matter for this strategy, so we'll just leave it
    crypto_data_filepath = "Chainlink_prices/"
    crypto_data_filename = "chainlink_crypto_prices_2025-10-30.jsonl"

    market_data_filepath = f"{path}/"
    crypto_symbol = "btc/usd"
    # market_slug = "btc-updown-15m-1761786000"

    # millisecond unix precision
    start_time_s = int(market_slug.split("-")[-1])
    start_time_ms = start_time_s*1000
    end_time_ms = (start_time_s + 900)*1000

    warm_up_duration = 0 # 5 minutes in milliseconds?
    time_precision = 13 # in millisecond format (10 = second format)

    bounce_pairs = []
    # (start, up, down) 0.005 and 0.995 are basically 1 (no bids/asks)
    space = np.array([0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0])
    space[0] += 0.005
    space[-1] -= 0.005
    for i, start in enumerate(space):
        for up in space[i+1:]:
            for down in space[:i]:
                bounce_pairs.append((start, up, down))
    
    strategy = BounceTrackStrategy(bounce_pairs, start_time_ms, end_time_ms, time_precision=13, effective_memory=300)
    
    print(f"\n\nSTARTING WORK FOR MARKET: {market_slug}\n\n")
    warm_up_data, backtest_data = backtest_market(crypto_data_filepath, crypto_data_filename, crypto_symbol,
                                market_data_filepath, market_slug,
                                start_time_ms, end_time_ms,
                                warm_up_duration,
                                strategy,
                                time_precision=13 
                            )

    up_down_market_data[market_slug] = backtest_data[-1]
    

  0%|          | 0/400 [00:00<?, ?it/s]



STARTING WORK FOR MARKET: btc-updown-15m-1762093800




KeyboardInterrupt: 

Exception ignored in: 'zmq.backend.cython._zmq.Frame.__dealloc__'
Traceback (most recent call last):
  File "zmq/backend/cython/_zmq.py", line 179, in zmq.backend.cython._zmq._check_rc
KeyboardInterrupt: 


#### TODO -> still need to do some investigation into order book crossing and how to resolve it. It's minor things now, but I need to dive in and see where the websocket inefficiencies are coming from

In [36]:
# up_down_market_data.values()

In [37]:
bounce_groups = defaultdict(list)
for slug, backtest_data in up_down_market_data.items():
    resolved_pairs = backtest_data['resolved_pairs']

    for bounce_group in resolved_pairs:
        bounce_groups[(bounce_group.start_value, bounce_group.up_value, bounce_group.down_value)].append(bounce_group)
    

In [38]:
expected_values = []

for group_label, group_list in bounce_groups.items():

    start_val, up_val, down_val = group_label
    num_values = len(group_list)
    freq_up = 0
    freq_down = 0

    for bounce_group in group_list:
        if bounce_group.up_hit_first:
            freq_up += 1
        else:
            freq_down += 1
    
    prob_up = freq_up / num_values
    prob_down = freq_down / num_values

    group_ev = prob_up * (up_val-start_val) + prob_down * (down_val - start_val)
    print(f"Start at {start_val:.3f}. Go up to {up_val:.3f} {prob_up:.3f}%, down to {down_val:.3f} {prob_down:3f}%, ev: {group_ev:.3f}")

    expected_values.append((group_ev, group_label))

Start at 0.500. Go up to 0.600 0.773%, down to 0.005 0.227041%, ev: -0.035
Start at 0.500. Go up to 0.600 0.736%, down to 0.100 0.264484%, ev: -0.032
Start at 0.500. Go up to 0.600 0.667%, down to 0.200 0.333333%, ev: -0.033
Start at 0.500. Go up to 0.600 0.595%, down to 0.300 0.405000%, ev: -0.021
Start at 0.500. Go up to 0.600 0.463%, down to 0.400 0.537500%, ev: -0.007
Start at 0.600. Go up to 0.700 0.569%, down to 0.500 0.430921%, ev: 0.014
Start at 0.600. Go up to 0.800 0.414%, down to 0.500 0.585526%, ev: 0.024
Start at 0.600. Go up to 0.900 0.329%, down to 0.500 0.671053%, ev: 0.032
Start at 0.600. Go up to 0.995 0.282%, down to 0.500 0.717608%, ev: 0.040
Start at 0.500. Go up to 0.700 0.677%, down to 0.005 0.323077%, ev: -0.025
Start at 0.500. Go up to 0.700 0.611%, down to 0.100 0.388889%, ev: -0.033
Start at 0.500. Go up to 0.700 0.525%, down to 0.200 0.474874%, ev: -0.037
Start at 0.500. Go up to 0.700 0.454%, down to 0.300 0.546366%, ev: -0.019
Start at 0.500. Go up to 0.70

In [52]:
n = len(bounce_groups[(np.float64(0.7000000000000001), np.float64(0.995), np.float64(0.005))])
t = 0.09
n, t

(229, 0.09)

In [53]:
2*np.exp(-2*n*t**2)

np.float64(0.04896483853178021)

In [25]:
-2*n*t**2

-0.15500000000000003

In [41]:
sorted(expected_values, key = lambda x: -x[0])

[(np.float64(0.1091048034934497),
  (np.float64(0.7000000000000001), np.float64(0.995), np.float64(0.005))),
 (np.float64(0.09613207547169803),
  (np.float64(0.6000000000000001), np.float64(0.995), np.float64(0.005))),
 (np.float64(0.0860526315789473),
  (np.float64(0.8), np.float64(0.995), np.float64(0.005))),
 (np.float64(0.07314236111111105),
  (np.float64(0.6000000000000001), np.float64(0.9), np.float64(0.005))),
 (np.float64(0.07214285714285709),
  (np.float64(0.7000000000000001), np.float64(0.9), np.float64(0.005))),
 (np.float64(0.06570247933884288),
  (np.float64(0.7000000000000001), np.float64(0.995), np.float64(0.1))),
 (np.float64(0.05909326424870464),
  (np.float64(0.9), np.float64(0.995), np.float64(0.005))),
 (np.float64(0.05196347031963465),
  (np.float64(0.8), np.float64(0.995), np.float64(0.1))),
 (np.float64(0.048949999999999966),
  (np.float64(0.6000000000000001), np.float64(0.8), np.float64(0.005))),
 (np.float64(0.04841750841750833),
  (np.float64(0.600000000000000