In [1]:
import os
import glob
import dask

import numpy as np
import pandas as pd

from typing import List
from datetime import datetime
from src.preprocessing import extract_features, symmetrize_data
from src.microprice import get_micro_adjustment

In [2]:
def get_file_date(path: str) -> str:
    file_name = os.path.split(path)[1]
    next_date = file_name.split("_")[-2]
    return next_date

In [3]:
# set the path for raw and preprocessed folder 
# set the asset pair that you would like to calculate the microprice
ASSET = 'adausdt'
RAW_DATA_PATH = '/Users/mac/Desktop/Repos/FBD_Project/datasets/raw/'
PRC_DATA_PATH = '/Users/mac/Desktop/Repos/FBD_Project/datasets/processed/'

orderbook_list = sorted(glob.glob(RAW_DATA_PATH + ASSET + '/orderbook/*.csv.gz'))
# quote_list = sorted(glob.glob(RAW_DATA_PATH + ASSET + '/quotes/*.csv.gz'))
# trade_list = sorted(glob.glob(RAW_DATA_PATH + ASSET + 'btcusdt/trades/*.csv.gz'))

In [8]:
i = 10
# extract raw data
all_features = [extract_features(path) for path in orderbook_list[i-win_len:i]] 
df_feat = dask.compute(all_features)[0]
df_feat = pd.concat(df_feat)

# symmetrized data (for obtaining microprice)
df_sym = symmetrize_data(df_feat, symmetrize=True)

# get micro adjustment and save micro adjustment matrix.
df_micro = get_micro_adjustment(df_sym)
df_micro.to_csv(PRC_DATA_PATH + f'microprice/{ASSET}/micro_adjustment_{next_date}.csv')

LinAlgError: Singular matrix

In [4]:
win_len = 5

# for each window of specified length,
# process raw data to get features for calculation.
print(f'Start microprice calculation. (Asset {ASSET})')
for i in range(win_len, len(orderbook_list)):
    next_date = get_file_date(orderbook_list[i])
    print(f'Processing... (date: {next_date}, progress: {i-5}/{len(orderbook_list) - 1 - 5})')
    
    # extract raw data
    all_features = [extract_features(path) for path in orderbook_list[i-win_len:i]] 
    df_feat = dask.compute(all_features)[0]
    df_feat = pd.concat(df_feat)

    # symmetrized data (for obtaining microprice)
    df_sym = symmetrize_data(df_feat, symmetrize=True)

    # get micro adjustment and save micro adjustment matrix.
    df_micro = get_micro_adjustment(df_sym)
    df_micro.to_csv(PRC_DATA_PATH + f'microprice/{ASSET}/micro_adjustment_{next_date}.csv')

Start microprice calculation. (Asset adausdt)
Processing... (date: 2022-09-07, progress: 0/114)
                     mid_price  ba_spread  imbalance  mid_chg  next_ba_spread  \
timestamp                                                                       
2022-09-02 00:00:00    0.45725          1          4      0.0               1   
2022-09-02 00:00:01    0.45725          1          4      0.0               1   
2022-09-02 00:00:02    0.45725          1          4      0.0               1   
2022-09-02 00:00:03    0.45725          1          3      0.0               1   
2022-09-02 00:00:04    0.45725          1          3      0.0               1   

                     next_imbalance  
timestamp                            
2022-09-02 00:00:00               4  
2022-09-02 00:00:01               4  
2022-09-02 00:00:02               3  
2022-09-02 00:00:03               3  
2022-09-02 00:00:04               3  
Processing... (date: 2022-09-08, progress: 1/114)
                    

LinAlgError: Singular matrix

In [9]:
# symmetrized data (for obtaining microprice)
df_sym = symmetrize_data(df_feat, symmetrize=True)

# df sig will be used for trading signals
df_sig = symmetrize_data(df_feat, symmetrize=False)

In [22]:
# join micro price table to the df_sig.
df_sig = pd.merge(
            df_sig[['mid_price','ba_spread','imbalance']].reset_index(),
            df_micro.reset_index(),
            how='left',
            left_on=['ba_spread','imbalance'], 
            right_on=['ba_spread','imbalance']
        ).set_index('timestamp')

# micro price calculation: mid_price + g_star
df_sig['micro_price'] = df_sig['mid_price'] + df_sig['g_star'] 

In [27]:
# micro price 
df_sig[['mid_price', 'micro_price']]

Unnamed: 0_level_0,mid_price,micro_price
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-02 00:00:00,20122.55,20122.533317
2022-09-02 00:00:01,20122.55,20122.533317
2022-09-02 00:00:03,20120.75,20120.733317
2022-09-02 00:00:08,20124.85,20124.866683
2022-09-02 00:00:09,20124.85,20124.854311
...,...,...
2022-09-06 23:59:48,18779.95,18779.933317
2022-09-06 23:59:49,18779.95,18779.933317
2022-09-06 23:59:51,18780.55,18780.566683
2022-09-06 23:59:54,18784.35,18784.354311
