In [14]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [15]:
from datetime import datetime
import pandas as pd
import numpy as np
import re
CME_datetime_format = "%Y%m%d%H%M%S%f"

In [16]:
symbol = 'ESH2'
month = '01'
day = '03'
next_day = '0'+str(int(day)+1) if len(str(int(day)+1)) == 1 else str(int(day)+1)
datestr = "2022"+month+day
data_path = 'C:/data/CME/2022/%s/%s/MBO/XCME/'%(month, day)
order_file = data_path + symbol + '_orders.csv'
prefix_file = data_path + symbol + '_prefix.csv'
next_prefix_out = 'C:/data/CME/2022/%s/%s/MBO/XCME/'%(month, next_day) + symbol + '_prefix.csv'
suffix_file = data_path + symbol + '_suffix.csv'
print(order_file, prefix_file, next_prefix_out, suffix_file)


C:/data/CME/2022/01/03/MBO/XCME/ESM2_orders.csv C:/data/CME/2022/01/03/MBO/XCME/ESM2_prefix.csv C:/data/CME/2022/01/04/MBO/XCME/ESM2_prefix.csv C:/data/CME/2022/01/03/MBO/XCME/ESM2_suffix.csv


In [17]:
def load_lit_orders(order_file, prefix=None, suffix=None):
    orders = pd.read_csv(order_file, parse_dates=['TRANSACTTIME'], date_parser=lambda x: pd.to_datetime(x, format=CME_datetime_format))
    if prefix is not None:
        prefix_df = pd.read_csv(prefix, parse_dates=['TRANSACTTIME'], date_parser=lambda x: pd.to_datetime(x, format=CME_datetime_format))
        orders = pd.concat([prefix_df, orders])
    if suffix is not None:
        suffix_df = pd.read_csv(suffix, parse_dates=['TRANSACTTIME'], date_parser=lambda x: pd.to_datetime(x, format=CME_datetime_format))
        orders = pd.concat([orders, suffix_df])
    columns = ['TRANSACTTIME', 'PUBLIC_ORDER_ID', 'EXECTYPE', 'SIDE', 'PRICE', 'VISIBLEQTY']
    df = orders[columns].reset_index().sort_values(['TRANSACTTIME', 'index']).set_index('TRANSACTTIME').iloc[:, 1:]
    df['EXECTYPE'] = df['EXECTYPE'].apply(lambda x: 'Insert' if x ==0 else ('Amend' if x == 1 else 'Cancel'))
    df['SIDE'] = df['SIDE'].apply(lambda x: 'Buy' if x == 0 else 'Sell')
    df['PUBLIC_ORDER_ID'] = df['PUBLIC_ORDER_ID'].astype(str)
    return df

In [18]:
total_df = load_lit_orders(order_file, prefix=prefix_file, suffix=None)

In [19]:
def get_unsettled_ord(total_df):
    all_ids = set(total_df['PUBLIC_ORDER_ID'])
    cancel_ids = set(total_df[total_df['EXECTYPE'] == 'Cancel']['PUBLIC_ORDER_ID'])
    unsettled_ids = all_ids.difference(cancel_ids)
    unsettled_df = total_df[total_df['PUBLIC_ORDER_ID'].isin(unsettled_ids)]
    return unsettled_df


def get_prefix_suffix_ords(unsettled_df, datestr, prefix=True):
    dummy_exec = 0 if prefix else 2
    ord_dict = {}
    t_str = datestr + '223000000000001'
    for _, row in unsettled_df.iterrows():
        if row['EXECTYPE'] == 'Insert' or row['EXECTYPE'] == 'Amend':
            ord_dict[row['PUBLIC_ORDER_ID']] = (t_str, row['PUBLIC_ORDER_ID'], symbol, dummy_exec, row['SIDE'], row['PRICE'], row['VISIBLEQTY'])
        elif row['EXECTYPE'] == 'Cancel':
            ord_dict.pop(row['PUBLIC_ORDER_ID'], None)
    prefix_ord = np.transpose(pd.DataFrame(ord_dict, index=['TRANSACTTIME','PUBLIC_ORDER_ID','SYMBOL','EXECTYPE','SIDE','PRICE','VISIBLEQTY']))
    prefix_ord['SIDE'] = prefix_ord['SIDE'].apply(lambda x: 0 if x == 'Buy' else 1)
    return prefix_ord

In [20]:
%%time
unsettled_df = get_unsettled_ord(total_df)


Wall time: 297 ms


In [24]:
prefix_ord = get_prefix_suffix_ords(unsettled_df, datestr, prefix=True)
prefix_ord.to_csv(next_prefix_out, index=False)

In [25]:
suffix_ord = get_prefix_suffix_ords(unsettled_df, datestr, prefix=False)
suffix_ord.to_csv(suffix_file, index=False)