In [1]:
import os 
import sys 
import csv 
import yaml
import numpy as np 
import pandas as pd 
from tqdm import tqdm 
import datetime 

In [2]:
# define fixed variable and user inputs : 

IST = datetime.timezone(datetime.timedelta(hours=5, minutes=30))
 
header_list = ['trading_date','data_source','stream_id','exchange_epoch_nanos','server_epoch_nanos','capture_epoch_nanos',
'contract_id','venue_token','contract_name','market_segment_id','symbol','contract_type','strike_price','expiry date',
'display_factor','handle_value','currency_unit','is_touchline_change','feed_message_id','inpacket_sequence_num',
'is_last_in_feed_message','event_type','packet_sequence_num','contract_sequence_num','is_implied','transact_epoch_nanos',
'bp1','bq1','boc1','has_hidden_qty_bid','ap1','aq1','aoc1','has_hidden_qty_ask','bp2','bq2','boc2','ap2','aq2','aoc2',
'bp3','bq3','boc3','ap3','aq3','aoc3','bp4','bq4','boc4','ap4','aq4','aoc4','bp5','bq5','boc5','ap5','aq5','aoc5',
'implied_bp1','implied_bq1','implied_ap1','implied_aq1','implied_bp2','implied_bq2','implied_ap2','implied_aq2',
'previous_mid','mid','is_mid_change','weighted_mid','contract_status','side','price','qty','order_count','qpos',
'old_qpos','oid1','oid2','priority','old_price','old_qty','aggressor_type','qty_in_last_trade','level','capture_ts timestamptz']

header_list_to_keep = ['exchange_epoch_nanos','server_epoch_nanos','contract_name','symbol','expiry date','is_touchline_change','feed_message_id','inpacket_sequence_num',
'is_last_in_feed_message','event_type','transact_epoch_nanos',
'bp1','bq1','boc1','has_hidden_qty_bid','ap1','aq1','aoc1','has_hidden_qty_ask','bp2','bq2','boc2','ap2','aq2','aoc2',
'bp3','bq3','boc3','ap3','aq3','aoc3','bp4','bq4','boc4','ap4','aq4','aoc4','bp5','bq5','boc5','ap5','aq5','aoc5',
'previous_mid','mid','is_mid_change','weighted_mid','side','price','qty','order_count','qpos',
'old_qpos','oid1','oid2','priority','old_price','old_qty','aggressor_type','qty_in_last_trade','level']

cols_to_check = ['market_segment_id','contract_type',
'display_factor','handle_value','is_touchline_change','inpacket_sequence_num',
'is_last_in_feed_message','contract_sequence_num','is_implied','contract_status','qpos',
'old_qpos']

ns_cols = ['exchange_epoch_nanos' , 'server_epoch_nanos' , 'capture_epoch_nanos' , 'transact_epoch_nanos' , 'priority']

with open('user_input.yaml' , 'r' ) as f : 
    inputs = yaml.safe_load( f ) 


usecols = [header_list.index(col) for col in header_list_to_keep ]

In [3]:
usecols

[3,
 4,
 8,
 10,
 13,
 17,
 18,
 19,
 20,
 21,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 66,
 67,
 68,
 69,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84]

In [4]:
# path to obo file :
date_dir =  os.path.join(inputs['parent_dir'] , inputs['date'] )
for file in os.listdir( date_dir ) : 
    if file.startswith('obo') : 
        file_path = os.path.join( date_dir , file )
        break 

file_path 

'/data/ashmit_data/20250519/obo_20250519.log'

In [5]:
def get_iter() : 
    return pd.read_csv(file_path, header=None, names=header_list_to_keep , chunksize=inputs['chunk_size'] , dtype = {82 : str} , usecols= usecols ) 

In [6]:
def get_unique( cols_to_check ): 
    check_dict = { col : []  for col in cols_to_check }
    for chunk in get_iter() : 
        for col in cols_to_check : 
            temp = chunk.dropna(subset=[col])
            check_dict[col] = check_dict[col] + temp[col].unique().tolist()
    for col in check_dict : 
        print(f'for {col} : {np.unique(check_dict[col])}')

In [7]:
def convert_ns_to_time(time_ns ) : 
    dt = datetime.datetime.fromtimestamp( np.floor(time_ns/1e9) , tz=IST).replace(tzinfo=None).time()
    ns_part = f"{time_ns%int(1e9)*1e-9:.9f}"

    return dt.strftime('%H:%M:%S') + ns_part[1:]

In [8]:
convert_ns_to_time( 1747648988764685036 )

'15:33:08.764685036'

In [9]:
# get_unique(['event_type' , 'qpos' , 'level'])

In [14]:
next(get_iter()).fillna('--').to_csv('temp2.csv' , index = False )