In [1]:
# Initialise constants

events_length = {
    "S": 12,
    "R": 39,
    "H": 25,
    "Y": 20,
    "L": 26,
    "V": 35,
    "W": 12,
    "K": 28,
    "J": 35,
    "h": 21,
    "A": 36, # Add order
    "F": 40, # Add order
    "E": 31, # Order executed
    "C": 36, # Order executed with price
    "X": 23,
    "D": 19,
    "U": 35, # Order replace msg
    "P": 44, # Trade msg
    "Q": 40, # Trade msg
    "B": 19,
    "I": 50,
    "N": 20,
    "O": 48 
}

msg_formats = {
    "E": {  # Order executed
        "format_str": ">HH6sQIQ",
        "time_stmp": 2,
        "ord_ref_num": 3,
        "shares": 4
    },
    "C": {  # Order executed with price
        "format_str": ">HH6sQIQcI", 
        "time_stmp": 2,
        "ord_ref_num": 3,
        "shares": 4,
        "printable": 6,
        "price": 7
    },
    "P": {  # Non cross for hidden orders
        "format_str": ">HH6sQcI8sIQ",
        "time_stmp": 2,
        "shares": 5,
        "stock": 6,
        "price": 7
    },
    "Q": {  # Cross trades
        "format_str": ">HH6sQ8sIQc",
        "time_stmp": 2,
        "shares": 3,
        "stock": 4,
        "price": 5
    },
    "A": {  # Add Order
        "format_str": ">HH6sQcI8sI",
        "time_stmp": 2,
        "ord_ref_num": 3,
        "shares": 5,
        "stock": 6,
        "price": 7
    },
    "F": {  # Add Order with MPID
        "format_str": ">HH6sQcI8sI4s",
        "time_stmp": 2,
        "ord_ref_num": 3,
        "shares": 5,
        "stock": 6,
        "price": 7
    },
    "U": {  # Replace Order
        "format_str": ">HH6sQQII",
        "time_stmp": 2,
        "old_ord_ref_num": 3,
        "ord_ref_num": 4,
        "shares": 5,
        "price": 6
    },
}

In [2]:
# Define functions

import struct

def calc_vwap(msg_type, data):
    value = struct.unpack(msg_formats[msg_type]["format_str"], data)
        
    shares = value[msg_formats[msg_type]["shares"]]
    if "ord_ref_num" in msg_formats[msg_type]:
        ord_ref_num = value[msg_formats[msg_type]["ord_ref_num"]]

    if "stock" in msg_formats[msg_type]:
        stock = value[msg_formats[msg_type]["stock"]].decode('ascii')
    else:
        stock = order_book[ord_ref_num]["stock"]

    if "price" in msg_formats[msg_type]:
        price = value[msg_formats[msg_type]["price"]]/10000
    else:
        price = order_book[ord_ref_num]["price"]     

    
    if msg_type in {"E", "C"}:
        order_book[ord_ref_num]["shares"] -= shares
        if order_book[ord_ref_num]["shares"] == 0:
            del order_book[ord_ref_num]
    
    if msg_type == "C" and value[msg_formats[msg_type]["printable"]].decode('ascii')=="N":
        """According to section 1.4.2 of ITCH specification non-printable
        should be ignored for volume calculations"""
        return None

    
    if stock in vwap:
        vwap[stock]["PV"] += price*shares
        vwap[stock]["V"] += shares
    else:
        vwap[stock] = {
            "PV": price*shares , 
            "V": shares
        } 
        

def update_order_book(msg_type, data):
    value = struct.unpack(msg_formats[msg_type]["format_str"], data)
    ord_ref_num = value[msg_formats[msg_type]["ord_ref_num"]]

    if "stock" in msg_formats[msg_type]:
        stock = value[msg_formats[msg_type]["stock"]].decode('ascii')
    else:
        old_ord_ref_num = value[msg_formats[msg_type]["old_ord_ref_num"]]
        stock = order_book[old_ord_ref_num]["stock"]
        del order_book[old_ord_ref_num]

    
    shares = value[msg_formats[msg_type]["shares"]]
    price = value[msg_formats[msg_type]["price"]]/10000

    order_book[ord_ref_num] = {
        "stock": stock,
        "price": price,
        "shares": shares
    }


In [9]:
order_book = {}
vwap = {}

In [None]:
# Run main code

from sys import getsizeof
with open('../01302019_NASDAQ_ITCH50', mode="rb") as file:
    count = 0
    while True:
        file.seek(2, 1)
        data = file.read(1)
        msg_type = data.decode('ascii')

        if msg_type=="":
            break

        if count % 10000000 == 0:
            print(msg_type)
            print(f"order book, {len(order_book):_}, {getsizeof(order_book):_}")
            print(f"vwap, {len(vwap):_}, {getsizeof(vwap):_}")
            # print(order_book)
            # print(vwap)
        
        # if count == 100000000:
        #     break
        count+=1
        
        if msg_type in {"E", "C", "P", "Q"}:
            data2 = file.read(events_length[msg_type]-1)
            calc_vwap(msg_type, data2)
        elif msg_type in {"A", "F", "U"}:
            data2 = file.read(events_length[msg_type]-1)
            update_order_book(msg_type, data2)    
        else:
            file.seek(events_length[msg_type]-1, 1)

S
order book, 0, 64
vwap, 0, 64
I
order book, 3_886_922, 167_772_248
vwap, 943, 26_032
A
order book, 8_605_501, 335_544_400
vwap, 8_713, 207_616
D
order book, 12_953_046, 671_088_728
vwap, 8_713, 207_616
A
order book, 17_275_169, 671_088_728
vwap, 8_713, 207_616
A
order book, 21_611_775, 1_342_177_360
vwap, 8_713, 207_616
A
order book, 25_961_803, 1_342_177_360
vwap, 8_713, 207_616
D
order book, 30_310_811, 1_342_177_360
vwap, 8_713, 207_616
A
order book, 34_644_642, 1_342_177_360
vwap, 8_713, 207_616
D
order book, 39_000_312, 1_342_177_360
vwap, 8_713, 207_616
U
order book, 43_358_356, 2_684_354_648
vwap, 8_713, 207_616
U
order book, 47_716_433, 2_684_354_648
vwap, 8_713, 207_616
U
order book, 52_111_792, 2_684_354_648
vwap, 8_713, 207_616
D
order book, 56_489_374, 2_684_354_648
vwap, 8_713, 207_616
A
order book, 60_895_263, 2_684_354_648
vwap, 8_713, 207_616
A
order book, 65_315_399, 2_684_354_648
vwap, 8_713, 207_616
D
order book, 69_740_159, 2_684_354_648
vwap, 8_713, 207_616
D
ord

In [6]:
order_book

{9397: {'stock': 'ASML    ', 'price': 174.97, 'shares': 1500},
 9489: {'stock': 'ASML    ', 'price': 176.15, 'shares': 500},
 92: {'stock': 'SPY     ', 'price': 264.57, 'shares': 500},
 9649: {'stock': 'CRH     ', 'price': 28.74, 'shares': 1500},
 96: {'stock': 'SPY     ', 'price': 263.62, 'shares': 500},
 9697: {'stock': 'CRH     ', 'price': 28.73, 'shares': 1500},
 98: {'stock': 'EWJ     ', 'price': 53.67, 'shares': 1900},
 110: {'stock': 'EWJ     ', 'price': 53.95, 'shares': 1900},
 114: {'stock': 'GDX     ', 'price': 21.97, 'shares': 2000},
 122: {'stock': 'GDX     ', 'price': 22.19, 'shares': 1000},
 9789: {'stock': 'CRH     ', 'price': 28.82, 'shares': 1500},
 9841: {'stock': 'CRH     ', 'price': 28.83, 'shares': 1500},
 9897: {'stock': 'CRH     ', 'price': 28.84, 'shares': 1500},
 124: {'stock': 'XLB     ', 'price': 53.75, 'shares': 400},
 128: {'stock': 'XLB     ', 'price': 53.42, 'shares': 400},
 140: {'stock': 'XLI     ', 'price': 70.24, 'shares': 300},
 144: {'stock': 'XLI  

In [7]:
vwap

{'XLV     ': {'PV': 63307965.89500001, 'V': 713704},
 'TVIX    ': {'PV': 32541938.110000066, 'V': 703372},
 'AAPL    ': {'PV': 786377884.6850009, 'V': 4851443},
 'DPW     ': {'PV': 14003.50479999999, 'V': 149699},
 'UGAZ    ': {'PV': 13255489.980000013, 'V': 343538},
 'HMY     ': {'PV': 250820.47, 'V': 128368},
 'GOLD    ': {'PV': 4660156.524999993, 'V': 362501},
 'DRD     ': {'PV': 5899.95, 'V': 2705},
 'QCOM    ': {'PV': 69063437.44499993, 'V': 1386956},
 'SPY     ': {'PV': 959369564.1900054, 'V': 3623429},
 'UPRO    ': {'PV': 5447955.5200000005, 'V': 133725},
 'AMD     ': {'PV': 432484190.9050002, 'V': 19738589},
 'TEF     ': {'PV': 495001.8599999999, 'V': 57345},
 'BX      ': {'PV': 6344655.760000003, 'V': 192211},
 'FB      ': {'PV': 333052121.17500097, 'V': 2271844},
 'AMZN    ': {'PV': 893328914.5650063, 'V': 546498},
 'GOOG    ': {'PV': 87005794.00000012, 'V': 81206},
 'ALGN    ': {'PV': 97991843.24999996, 'V': 453206},
 'OCX     ': {'PV': 2036316.5800000005, 'V': 382544},
 'AU