# Data Preprocessing for Training

## Trading Bot Settings

In [1]:
import os
from config import TRANSACT_FEE_RATE, STOP_LOSS_RATE, TRAILING_RATE

data_dpath = os.path.join(os.getcwd(), "data")
os.chdir(os.path.pardir)

In [2]:
from src.market import MarketBase
from src.market.bitfinex import BitfinexSpot
from src.market_actor import MarketActorStub
from src.market_listener import MarketListenerStub
from src.advance_order.convertible_stop_loss import ConvertibleStopLoss

market = BitfinexSpot("BTC", "USD")
market_actor = MarketActorStub(TRANSACT_FEE_RATE, echo_mode=False)
market_listener = MarketListenerStub(market)

## Loading Data Pipeline Image

In [3]:
import pickle

with open(os.path.join(data_dpath, "data_pipeline.pickle"), "rb") as file:
    data_pipeline = pickle.load(file)
    
    candle_buffers = data_pipeline.get("candle_buffers")
    features_compiler = data_pipeline.get("features_compiler")

## Fetching Candle History

In [4]:
import asyncio
import time
import numpy as np
import pandas as pd

from bfxapi import Client

async def get_candles(bfx: Client, market: MarketBase, epochs: int = 100,
    frame_resolution: str = "1m") -> pd.DataFrame:

    end = int(time.time())
    end -= (end % 60) # Truncate to minutes
    end *= 1000 # Convert to ms

    candles = await asyncio.gather(*[
        bfx.rest.get_public_candles(symbol=market.get_ticker(), start=0,
                end=(end - epoch * 10000 * 60000), limit=10000, tf=frame_resolution)
        for epoch in range(epochs)
    ])

    candles = pd.DataFrame(
        np.concatenate(candles),
        columns=["Timestamp", "Open", "Close", "High", "Low", "Volume"]
    )

    candles["Timestamp"] /= 1000 # Convert from ms to seconds
    candles["Timestamp"] = candles["Timestamp"].astype(int)

    return candles.set_index("Timestamp").sort_index()

In [5]:
candles = await get_candles(Client(), market, epochs=300)

candles

Unnamed: 0_level_0,Open,Close,High,Low,Volume
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1668580440,16819.0,16817.0,16820.0,16810.0,3.552167
1668580500,16814.0,16811.0,16818.0,16811.0,3.672976
1668580560,16810.0,16799.0,16810.0,16798.0,6.654021
1668580620,16800.0,16814.0,16814.0,16800.0,3.114102
1668580680,16814.0,16828.0,16828.0,16814.0,3.258646
...,...,...,...,...,...
1669183140,16458.0,16463.0,16466.0,16458.0,2.937683
1669183200,16462.0,16469.0,16469.0,16462.0,1.087184
1669183260,16474.0,16475.0,16475.0,16473.0,0.671472
1669183320,16475.0,16472.0,16476.0,16472.0,5.760059


## Generating Observations

In [6]:
from tqdm import tqdm
from src.candle_buffer import CandleBuffer
from src.indicator import FeaturesCompiler
from src.tradebook import Tradebook

def extract_observations(candle_buffers: dict[int, CandleBuffer], features_compiler: FeaturesCompiler,
    candles: pd.DataFrame):
    global true_x, obs_x, obs_y
    tradebook = Tradebook(10) # To facilitate update of orderbook
    timestamps, observations = [], []
    buffer_ready = False

    for timestamp, data in tqdm(list(candles.iterrows())):
        update_timestamp = timestamp - 1 # The passed timestamp denotes the end of time frame
        tradebook.append_trade(update_timestamp, (data.get("Open") + data.get("Close")) / 2,
                data.get("Volume"))
        
        for candle_buffer in candle_buffers.values():
            candle_buffer.update(update_timestamp, data.get("Open"), tradebook)
            candle_buffer.update(update_timestamp, data.get("High"), tradebook)
            candle_buffer.update(update_timestamp, data.get("Low"), tradebook)
            candle_buffer.update(update_timestamp, data.get("Close"), tradebook)

        if not buffer_ready:
            for candle_buffer in candle_buffers.values():
                buffer_ready = (candle_buffer.get_size() == candle_buffer.get_capacity())
                if not buffer_ready: break
                
        if buffer_ready:
            timestamps.append(timestamp)
            observations.append(features_compiler.get().copy()) # Create a copy from the memoryview
    
    return np.array(timestamps, dtype=int), np.array(observations, dtype=float)

In [7]:
obs_timestamps, observations = extract_observations(candle_buffers, features_compiler, candles)

100%|██████████| 10000/10000 [00:38<00:00, 259.18it/s]


In [8]:
# We need to replace the observations for CryptoFearAndGreed: features[0]
observations[:, 0]

array([-0.56, -0.56, -0.56, ..., -0.56, -0.56, -0.56])

In [9]:
from src.indicator.fear_and_greed import CryptoFearAndGreed

fng_timestamps, fng_values = CryptoFearAndGreed.get_hist_data()
max_index = fng_timestamps.shape[0] - 1
fng_index = 0

for obs_index, timestamp in enumerate(obs_timestamps):
    while (fng_index != max_index) and (timestamp > fng_timestamps[fng_index + 1]):
        fng_index += 1

    observations[obs_index, 0] = fng_values[fng_index]

observations[:, 0]

array([-0.54, -0.54, -0.54, ..., -0.56, -0.56, -0.56])

## Truncate Candles to Number of Observations

In [10]:
candles = candles[-observations.shape[0]:]

## Generating Outputs (Trading Signals)

In [11]:
close_values = candles["Close"].values
high_values = candles["High"].values
low_values = candles["Low"].values    

In [12]:
# Iterate for long signals
long_signals = []

for open_timestamp in tqdm(range(close_values.shape[0])):
    position = market_actor.open_position(market, close_values[open_timestamp], size=1)

    advance_order = ConvertibleStopLoss(
        position, market_actor, market_listener, close_values[open_timestamp],
        STOP_LOSS_RATE, TRAILING_RATE, stop_loss_as_rate=True
    )

    for timestamp in range(open_timestamp + 1, close_values.shape[0]):
        # Simulate unfavourable development by updating low before high
        market_listener.set_current_price(low_values[timestamp])
        advance_order.update()

        market_listener.set_current_price(high_values[timestamp])
        advance_order.update()

        market_listener.set_current_price(close_values[timestamp])
        advance_order.update()

        if advance_order.filled:
            break
    
    if not advance_order.filled:
        break

    long_signals.append(int(advance_order.position.balances.get("USD") > 0))

long_signals = np.array(long_signals, dtype=int)
np.sum(long_signals) / long_signals.shape[0] * 100 # Percentage of signals

 97%|█████████▋| 6907/7095 [00:09<00:00, 740.52it/s] 


30.592152888374113

In [13]:
# Iterate for short signals
short_signals = []

for open_timestamp in tqdm(range(close_values.shape[0])):
    position = market_actor.open_position(market, close_values[open_timestamp], size=-1)

    advance_order = ConvertibleStopLoss(
        position, market_actor, market_listener, close_values[open_timestamp],
        STOP_LOSS_RATE, TRAILING_RATE, stop_loss_as_rate=True
    )

    for timestamp in range(open_timestamp + 1, close_values.shape[0]):
        # Simulate unfavourable development by updating high before low
        market_listener.set_current_price(high_values[timestamp])
        advance_order.update()

        market_listener.set_current_price(low_values[timestamp])
        advance_order.update()

        market_listener.set_current_price(close_values[timestamp])
        advance_order.update()

        if advance_order.filled:
            break
    
    if not advance_order.filled:
        break

    short_signals.append(int(advance_order.position.balances.get("USD") > 0))

short_signals = np.array(short_signals, dtype=int)
np.sum(short_signals) / short_signals.shape[0] * 100 # Percentage of signals

 97%|█████████▋| 6878/7095 [00:10<00:00, 661.21it/s] 


28.656586216923525

## Saving Datasets

In [14]:
np.save(os.path.join(data_dpath, "observations.npy"), observations)
np.save(os.path.join(data_dpath, "long_signals.npy"), long_signals)
np.save(os.path.join(data_dpath, "short_signals.npy"), short_signals)