<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Observe-the-data" data-toc-modified-id="Observe-the-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Observe the data</a></span><ul class="toc-item"><li><span><a href="#Operation-types:" data-toc-modified-id="Operation-types:-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Operation types:</a></span></li></ul></li><li><span><a href="#Make-daily-orderbook-for-6-instruments" data-toc-modified-id="Make-daily-orderbook-for-6-instruments-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Make daily orderbook for 6 instruments</a></span></li><li><span><a href="#Check-whether-orderbooks-have-collisions" data-toc-modified-id="Check-whether-orderbooks-have-collisions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Check whether orderbooks have collisions</a></span></li><li><span><a href="#Result" data-toc-modified-id="Result-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Result</a></span></li></ul></div>

### Observe the data

- NO-record number
- SECCODE-instrument code
- BUYSELL-buy/sell(B/S)
- TIME-timestamp HHMMSSZZZXXX format
- ORDERNO-order number; what exactly considered as order?
- ACTION-type of order 0-revoke, 1-post, 2-match
- PRICE-price of the order
- VOLUME-the volume of the order
- TRADENO-trade number
- TRADEPRICE-price of the deal

TODO: 
- define instrument
- types of intruments:
-    USD000000TOD
-    USD000UTSTOM
-    EUR_RUB_TOD
-    EUR_RUB_TOM
-    EURUSD000TOD
-    EURUSD000TOM

#### Operation types:
- ADD:
    - post the bid/ask
- DELETE:
    - revoke the bid/ask
    - replace the bid/ask passively(delete then add)
    - replace the bid/ask aggresively(delete)
    - the bid/ask was satisfied
- CHANGE:
    - quantity of the bid/ask was changed by system(the bid/ask partially satisfied)

### Make daily orderbook for 6 instruments
- USD000000TOD
- USD000UTSTOM
- EUR_RUB_TOD
- EUR_RUB_TOM
- EURUSD000TOD
- EURUSD000TOM

In [1]:
%load_ext Cython
%load_ext line_profiler

In [44]:
%%cython -3
# %%time
cimport cython
cimport numpy as np
import numpy as np



cdef dict instruments_info = {'USD000000TOD': {'SCHEDULE': 174500000000},
                              'USD000UTSTOM': {'SCHEDULE': 235000000000},
                              'EUR_RUB__TOD': {'SCHEDULE': 150000000000},
                              'EUR_RUB__TOM': {'SCHEDULE': 235000000000},
                              'EURUSD000TOM': {'SCHEDULE': 235000000000},
                              'EURUSD000TOD': {'SCHEDULE': 150000000000}}


@cython.wraparound(False)
@cython.boundscheck(False)
cdef inline delete_offer(dict offers, str seccode, float price, int volume):
    if price in offers[seccode]:
        if offers[seccode][price] - volume <= 0:
            offers[seccode].pop(price)
        else:
            offers[seccode][price] -= volume

@cython.wraparound(False)                        
@cython.boundscheck(False)
cdef inline add_offer(dict offers, str seccode, float price, int volume):       
    if price in offers[seccode]:
        offers[seccode][price] += volume
    else:
        offers[seccode][price] = volume
        
        
@cython.boundscheck(False)
@cython.wraparound(False)        
cpdef row_scanner(dict bids,
                  dict asks,
                  np.ndarray [np.int_t, ndim=1] timestamp,
                  np.ndarray [np.int_t, ndim=1] action,
                  np.ndarray [str, ndim=1] buysell,
                  np.ndarray [str, ndim=1] seccode,
                  np.ndarray [np.float_t, ndim=1] price,
                  np.ndarray [np.int_t, ndim=1] volume):   
    
    cdef int i
    
    for i in range(len(timestamp)):    
        if seccode[i] in instruments_info.keys() and timestamp[i] < instruments_info[seccode[i]]['SCHEDULE']:
            if action[i] == 1:
                if buysell[i] == 'B':
                    add_offer(bids, seccode[i], price[i], volume[i])
                else:
                    add_offer(asks, seccode[i], price[i], volume[i])
            else:
                if buysell[i] == 'B':
                    delete_offer(bids, seccode[i], price[i], volume[i])
                else:
                    delete_offer(asks, seccode[i], price[i], volume[i])

In [45]:
# %%time
import pandas as pd
import shutil
import os


def f():

    instruments_info = {'USD000000TOD': {'SCHEDULE': 174500000000},
                      'USD000UTSTOM': {'SCHEDULE': 235000000000},
                      'EUR_RUB__TOD': {'SCHEDULE': 150000000000},
                      'EUR_RUB__TOM': {'SCHEDULE': 235000000000},
                      'EURUSD000TOM': {'SCHEDULE': 235000000000},
                      'EURUSD000TOD': {'SCHEDULE': 150000000000}}
    months = ["03"]  # , "04", "05"]


    for month in months:
        foldername = f"../results/orderbooks/2018-{month}"
        if os.path.exists(foldername):
            shutil.rmtree(foldername)
        os.mkdir(foldername)

        folder_with_data = f"../data/2018-{month}"
        files = os.listdir(folder_with_data)

        for file in ['OrderLog20180301.txt']:  # files:
            if file.find("OrderLog2018") == 0:            

                orderlog = pd.read_csv("../data/2018-03/OrderLog20180301.txt", 
                                       index_col='NO',
                                       usecols=['NO', 
                                                 'SECCODE', 
                                                 'BUYSELL',
                                                 'TIME',
                                                 'ACTION',
                                                 'PRICE',
                                                 'VOLUME'],
                                       engine='c')

                bids = {}
                asks = {}

                for i in instruments_info.keys():
                    bids[i] = {}
                    asks[i] = {}

                # bids, asks =     
                row_scanner(bids,
                            asks,
                            orderlog.TIME.values,
                            orderlog.ACTION.values,
                            orderlog.BUYSELL.values,
                            orderlog.SECCODE.values,
                            orderlog.PRICE.values,
                            orderlog.VOLUME.values)

                day = file[-6:-4]
                foldername = f"../results/orderbooks/2018-{month}/{day}"
                if os.path.exists(foldername):
                    shutil.rmtree(foldername)
                os.mkdir(foldername)

                for instrument in instruments_info.keys():
                    filename = f"{instrument}.txt"
                    # TODO: change output file format to csv
                    with open(f'{foldername}/{instrument}.txt', 'wt') as out:
                        for key in sorted(asks[instrument], reverse=True):
                            out.write("A".ljust(5) + f"{key}".ljust(10) + f"{asks[instrument][key]}\n")

                        out.write("\n=================================\n\n")

                        for key in sorted(bids[instrument], reverse=True):
                            out.write("B".ljust(5) + f"{key}".ljust(10) + f"{bids[instrument][key]}\n")

                del orderlog
                del bids
                del asks

In [47]:
# %prun f()

### Check whether orderbooks have collisions
max(bid) > min(ask)

### Result
6 orderbooks for each 64 days and the number of books with collisions