In [1]:
import numpy as np
import pandas as pd
from glob import glob
import gzip
import shutil
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import math

# Preprocessing

In [2]:
instruments = ['USD000000TOD', 'USD000UTSTOM', 'EUR_RUB__TOD', 'EUR_RUB__TOM', 'EURUSD000TOM', 'EURUSD000TOD']

instruments_info = {'USD000000TOD': {'SCHEDULE': 174500000000, 'PRICE_STEP': 0.0025},
                    'USD000UTSTOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025},
                    'EUR_RUB__TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.0025},
                    'EUR_RUB__TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.0025},
                    'EURUSD000TOM': {'SCHEDULE': 235000000000, 'PRICE_STEP': 0.00001},
                    'EURUSD000TOD': {'SCHEDULE': 150000000000, 'PRICE_STEP': 0.00001}}

# Generate Spectrum and Orderbooks

In [3]:
class Spectrum:
    def __init__(self, price_step):
        self.price_step = price_step
        self.buy_bins = []
        self.all_buy_bins = []
        self.all_buy_volumes = []
        self.all_sell_bins = []
        self.all_sell_volumes = []
        self.sell_bins = []
        self.best_sell = -1
        self.best_buy = -1
        self.buy_volumes = [0 for i in range(50)]
        self.sell_volumes = [0 for i in range(50)]
        self.spectrum_sell_bins = []
        self.spectrum_buy_bins = []
        self.spectrum_sell = [0 for i in range(10)]
        self.spectrum_buy = [0 for i in range(10)]

    def insert_to_buy(self, price, volume):
        max_price = self.all_buy_bins[0]
        # new one is greater than the currently recorded maximum
        if price > max_price:
            dist = math.ceil((price - max_price) / self.price_step)
            self.all_buy_bins = [i for i in np.arange(price, max_price, -self.price_step)] + self.all_buy_bins
            self.all_buy_volumes = [0 for i in range(len(self.all_buy_bins) - len(self.all_buy_volumes))] + self.all_buy_volumes
            self.all_buy_volumes[0] += volume
            return 0
        else:
            idx = math.ceil((max_price - price) / self.price_step)
            if idx < len(self.all_buy_bins):
                self.all_buy_volumes[idx] += volume
                return idx
            else:
                dist = idx - len(self.all_buy_bins) + 1
                self.all_buy_bins = self.all_buy_bins + [i for i in np.arange(self.all_buy_bins[-1] - self.price_step, price - 1, -self.price_step)]
                self.all_buy_volumes = self.all_buy_volumes + [0 for i in range(len(self.all_buy_bins) - len(self.all_buy_volumes))]
                self.all_buy_volumes[idx] += volume
                return idx

    def insert_to_sell(self, price, volume):
        min_price = self.all_sell_bins[0]
        # new one is less than the currently recorded minimum
        if price < min_price:
            dist = math.ceil((min_price - price) / self.price_step)
            self.all_sell_bins = [i for i in np.arange(price, min_price, self.price_step)] + self.all_sell_bins
            self.all_sell_volumes = [0 for i in range(len(self.all_sell_bins) - len(self.all_sell_volumes))] + self.all_sell_volumes
            self.all_sell_volumes[0] += volume
            return 0
        else:
            idx = math.ceil((price - min_price) / self.price_step)
            if idx < len(self.all_sell_bins):
                self.all_sell_volumes[idx] += volume
                return idx
            else:
                dist = idx - len(self.all_sell_bins) + 1
                self.all_sell_bins = self.all_sell_bins + [i for i in np.arange(self.all_sell_bins[-1] + self.price_step, price + 1, self.price_step)]
                self.all_sell_volumes = self.all_sell_volumes + [0 for i in range(len(self.all_sell_bins) - len(self.all_sell_volumes))]
                self.all_sell_volumes[idx] += volume
                return idx

    def delete_from_buy(self, price, volume):
        max_price = self.all_buy_bins[0]
        idx = math.ceil((max_price - price) / self.price_step)
        if 0 <= idx < len(self.all_buy_bins):
            if volume < self.all_buy_volumes[idx]:
                self.all_buy_volumes[idx] -= volume
                return idx
            # find first non-zero element
            else:
                self.all_buy_volumes[idx] = 0
                while self.all_buy_volumes[idx] == 0:
                    if (idx == len(self.all_buy_volumes) - 1):
                        break
                    idx += 1
                return idx
        else:
            return -1

    def delete_from_sell(self, price, volume):
        min_price = self.all_sell_bins[0]
        idx = math.ceil((price - min_price) / self.price_step)
        if 0 <= idx < len(self.all_sell_bins):
            if volume < self.all_sell_volumes[idx]:
                self.all_sell_volumes[idx] -= volume
                return idx
            # find first non-zero element
            else:
                self.all_sell_volumes[idx] = 0
                while self.all_sell_volumes[idx] == 0:
                    if (idx == len(self.all_sell_volumes) - 1):
                        break
                    idx += 1
                return idx
        else:
            return -1

    def find_idx_sell(self, price):
        k = math.ceil((price - self.best_sell) / self.price_step)
        if k == 50:
            k = 49
        return int(k)

    def find_idx_buy(self, price):
        k = math.ceil((self.best_buy - price) / self.price_step)
        if k == 50:
            k = 49
        return int(k)

    def find_idx_spectrum_sell(self, price):
        k = math.ceil((price - self.best_sell) / self.price_step) // 5
        if k == 10:
            k = 9
        return k

    def find_idx_spectrum_buy(self, price):
        k = math.ceil((self.best_buy - price) / self.price_step) // 5
        if k == 10:
            k = 9
        return k

    def recalc_spectrum_sell(self):
        self.spectrum_sell_bins = [self.sell_bins[i] for i in range(0, 50, 5)]
        self.spectrum_sell = [sum(self.sell_volumes[i:i+5]) for i in range(0, 50, 5)]
        
    def recalc_spectrum_buy(self):
        self.spectrum_buy_bins = [self.buy_bins[i] for i in range(0, 50, 5)]
        self.spectrum_buy = [sum(self.buy_volumes[i:i+5]) for i in range(0, 50, 5)]

    def new_sell_order(self, price, volume):
        # no sell orders recorded yet
        if self.best_sell == -1:
            self.best_sell = price
            max_sell = self.best_sell + 50 * self.price_step
            self.sell_bins = [p for p in np.arange(self.best_sell, max_sell, self.price_step)]
            self.spectrum_sell_bins = [p for p in np.arange(self.best_sell, max_sell, self.price_step * 5)]
            self.sell_volumes[0] = volume
            self.spectrum_sell[0] = volume
            self.all_sell_bins = self.sell_bins.copy()
            self.all_sell_volumes = self.sell_volumes.copy()
        else:
            # sell order falls somewhere in the existing bins
            if self.best_sell <= price < self.best_sell + 50 * self.price_step:
                idx = self.find_idx_sell(price)
                if idx == 50:
                    idx = 49
                self.sell_volumes[idx] += volume
                spect_idx = self.find_idx_spectrum_sell(price)
                self.spectrum_sell[spect_idx] += volume
                _ = self.insert_to_sell(price, volume)
            else:
                # found new best, update everything
                if self.best_sell > price:
                    idx = self.insert_to_sell(price, volume)
                    self.best_sell = price
                    if idx + 50 < len(self.all_sell_bins):
                        self.sell_bins = self.all_sell_bins[idx:idx+50]
                        self.sell_volumes = self.all_sell_volumes[idx:idx+50]
                    else:
                        self.sell_bins = [p for p in np.arange(self.best_sell, self.best_sell + 50 * self.price_step, self.price_step)]
                        self.sell_volumes = self.all_sell_volumes[idx:] + [0 for i in range(50 - len(self.all_sell_volumes) + idx)]
                    self.recalc_spectrum_sell()
                # save for the later usage
                else:
                    _ = self.insert_to_sell(price, volume)
                    
    def new_buy_order(self, price, volume):
        # no buy orders recorded yet
        if self.best_buy == -1:
            self.best_buy = price
            min_buy = self.best_buy - 50 * self.price_step
            self.buy_bins = [p for p in np.arange(self.best_buy, min_buy, -self.price_step)]
            self.spectrum_buy_bins = [p for p in np.arange(self.best_buy, min_buy, -self.price_step * 5)]
            self.buy_volumes[0] = volume
            self.spectrum_buy[0] = volume
            self.all_buy_bins = self.buy_bins.copy()
            self.all_buy_volumes = self.buy_volumes.copy()
        else:
            # buy order falls somewhere in the existing bins
            if self.best_buy >= price > self.best_buy - 50 * self.price_step:
                idx = self.find_idx_buy(price)
                if idx == 50:
                    idx = 49
                self.buy_volumes[idx] += volume
                spect_idx = self.find_idx_spectrum_buy(price)
                self.spectrum_buy[spect_idx] += volume
                _ = self.insert_to_buy(price, volume)
            else:
                # found new best, update everything
                if self.best_buy < price:
                    idx = self.insert_to_buy(price, volume)
                    self.best_buy = price
                    if idx + 50 < len(self.all_buy_bins):
                        self.buy_bins = self.all_buy_bins[idx:idx+50]
                        self.buy_volumes = self.all_buy_volumes[idx:idx+50]
                    else:
                        self.buy_bins = [p for p in np.arange(self.best_buy, self.best_buy - 50 * self.price_step, -self.price_step)]
                        self.buy_volumes = self.all_buy_volumes[idx:] + [0 for i in range(50 - len(self.all_buy_volumes) + idx)]
                    self.recalc_spectrum_buy()
                # save for the later usage
                else:
                    _ = self.insert_to_buy(price, volume)
                    
    def delete_sell_order(self, price, volume):
        # does not remove current best
        if self.best_sell + 50 * self.price_step > price > self.best_sell or price == self.best_sell and volume < self.sell_volumes[0]:
            idx = self.find_idx_sell(price)
            self.sell_volumes[idx] = max(0, self.sell_volumes[idx] - volume)
            spect_idx = self.find_idx_spectrum_sell(price)
            self.spectrum_sell[spect_idx] = max(0, self.spectrum_sell[spect_idx] - volume)
        else:
            # if removes current best
            if price == self.best_sell and volume >= self.sell_volumes[0]:
                idx = self.delete_from_sell(price, volume)
                self.best_sell = self.all_sell_bins[idx]
                if idx + 50 < len(self.all_sell_bins):
                    self.sell_bins = self.all_sell_bins[idx:idx+50]
                    self.sell_volumes = self.all_sell_volumes[idx:idx+50]
                else:
                    self.sell_bins = [p for p in np.arange(self.best_sell, self.best_sell + 50 * self.price_step, self.price_step)]
                    self.sell_volumes = self.all_sell_volumes[idx:] + [0 for i in range(50 - len(self.all_sell_volumes) + idx)]
                self.recalc_spectrum_sell()
            # if does not fall in 50 steps
            elif price > self.best_sell + 50 * self.price_step:
                _ = self.delete_from_sell(price, volume)
                
    def delete_buy_order(self, price, volume):
        # does not remove current best
        if self.best_buy - 50 * self.price_step < price < self.best_buy or price == self.best_buy and volume < self.buy_volumes[0]:
            idx = self.find_idx_buy(price)
            self.buy_volumes[idx] = max(0, self.buy_volumes[idx] - volume)
            spect_idx = self.find_idx_spectrum_buy(price)
            self.spectrum_buy[spect_idx] = max(0, self.spectrum_buy[spect_idx] - volume)
        else:
            # if removes current best
            if price == self.best_buy and volume >= self.buy_volumes[0]:
                idx = self.delete_from_buy(price, volume)
                self.best_buy = self.all_buy_bins[idx]
                if idx + 50 < len(self.all_buy_bins):
                    self.buy_bins = self.all_buy_bins[idx:idx+50]
                    self.buy_volumes = self.all_buy_volumes[idx:idx+50]
                else:
                    self.buy_bins = [p for p in np.arange(self.best_buy, self.best_buy - 50 * self.price_step, -self.price_step)]
                    self.buy_volumes = self.all_buy_volumes[idx:] + [0 for i in range(50 - len(self.all_buy_volumes) + idx)]
                self.recalc_spectrum_buy()
            # if does not fall in 50 steps
            elif price > self.best_buy + 50 * self.price_step:
                _ = self.delete_from_buy(price, volume)

In [41]:
from dateutil.parser import parse

class OrderBookSpectrum():

    # def __init__(self, tradelog_labels):
    def __init__(self,
                 price_step,
                 spectrum_filename,
                 bid_volumes_filename,
                 ask_volumes_filename,
                 bid_volumes_post_filename,
                 ask_volumes_post_filename):
        self.ob_df = {}
        self.collisions = 0
        self.bid_volumes = []
        self.ask_volumes = []
        self.bid_volumes_post = []
        self.ask_volumes_post = []
        self.price_step = price_step
        self.spectrum = Spectrum(price_step)
        self.spectrum_file = open(spectrum_filename, "w+")
        self.current_bid_bands = 0
        self.current_ask_bands = 0
        self.current_bid_bands_post = 0
        self.current_ask_bands_post = 0
        self.bid_volumes_file = open(bid_volumes_filename, "w+")
        self.ask_volumes_file = open(ask_volumes_filename, "w+")
        self.bid_volumes_post_file = open(bid_volumes_post_filename, "w+")
        self.ask_volumes_post_file = open(ask_volumes_post_filename, "w+")
        # self.order_book_spectrum = open("my_order_book_spectrum.txt", "w") # Add name for particular order book
        # self.price_step = 0.0025


    def write_to_file(self, time):
        buy_spect = np.array(self.spectrum.spectrum_buy)
        sell_spect = np.array(self.spectrum.spectrum_sell)
        if buy_spect.sum() > 0 and sell_spect.sum() > 0:
            buy_norm = buy_spect / buy_spect.sum()
            sell_norm = sell_spect / sell_spect.sum()
            buy = ', '.join([str(i) for i in buy_norm])
            sell = ', '.join([str(i) for i in sell_norm])
            to_write = str(time) + ', ' + sell + ', ' + buy + '\n'
            # print(to_write)
            self.spectrum_file.write(to_write)

            self.bid_volumes_file.write(str(time) + ' ' + ' '.join(str(x) for x in self.current_bid_bands) + '\n')
            self.ask_volumes_file.write(str(time) + ' ' + ' '.join(str(x) for x in self.current_ask_bands) + '\n')

            self.bid_volumes_post_file.write(str(time) + ' ' + ' '.join(str(x) for x in self.current_bid_bands_post) + '\n')
            self.ask_volumes_post_file.write(str(time) + ' ' + ' '.join(str(x) for x in self.current_ask_bands_post) + '\n')


    def post_order(self, orderno, volume, buysell, price, time):
        self.ob_df[orderno] = {'volume': volume, 'buysell': buysell, 'price': price, 'time': time}
        if buysell == 'B':
            self.spectrum.new_buy_order(price, volume)
            
            max_price = self.spectrum.all_buy_bins[0]
            if abs(price - max_price) < self.price_step * 5:
                self.ask_volumes_post.insert(0, [time, volume])
        else:
            self.spectrum.new_sell_order(price, volume)
            
            min_price = self.spectrum.all_sell_bins[0]
            if abs(price - min_price) < self.price_step * 5:
                self.bid_volumes_post.insert(0, [time, volume])


    def revoke_order(self, orderno, volume, buysell, price, time):

        if orderno in self.ob_df:
            time = self.ob_df[orderno]
            
            if volume == self.ob_df[orderno]['volume']:
                self.ob_df.pop(orderno, None)
                if buysell == 'B':
                    self.spectrum.delete_buy_order(price, volume)
                else:
                    self.spectrum.delete_sell_order(price, volume)
            elif volume < self.ob_df[orderno]['volume']:
                self.ob_df[orderno]['volume'] -= volume
                if buysell == 'B':
                    self.spectrum.delete_buy_order(price, volume)
                else:
                    self.spectrum.delete_sell_order(price, volume)
            else:
                print('\nException: not possible volume for match: \n', orderno)
                self.collisions += 1
                self.delete_collision(orderno)
            
        else:
            print('\nException: orderno does not exist: \n', orderno)
            self.collisions += 1
            self.delete_collision(orderno)
    

    def match_order(self, orderno, volume, buysell, price, time):
        if orderno in self.ob_df:
            if volume == self.ob_df[orderno]['volume']:
                self.ob_df.pop(orderno, None)
                if buysell == 'B':
                    self.spectrum.delete_buy_order(price, volume)
                    self.ask_volumes.insert(0, [time, volume])
                else:
                    self.spectrum.delete_sell_order(price, volume)
                    self.bid_volumes.insert(0, [time, volume])
            elif volume < self.ob_df[orderno]['volume']:
                self.ob_df[orderno]['volume'] -= volume
                if buysell == 'B':
                    self.spectrum.delete_buy_order(price, volume)
                    self.ask_volumes.insert(0, [time, volume])
                else:
                    self.spectrum.delete_sell_order(price, volume)
                    self.bid_volumes.insert(0, [time, volume])
            else:
                print('\nException: not possible volume for match: \n', orderno)
                self.collisions += 1
        else:
            print('\nException: orderno does not exist: \n', orderno)
            self.collisions += 1


    def delete_collision(self, orderno):
        print('Delete collisioned orders with ORDERNO: ' + orderno)
        self.ob_df.pop(orderno, None)
        print('Current number of collisions: {}\n'.format(self.collisions))


    def new_order(self, action, orderno, volume, buysell, price, time):
        if action == 1:
            self.post_order(orderno, volume, buysell, price, time)
        elif action == 0:
            self.revoke_order(orderno, volume, buysell, price, time)
        elif action == 2:
            self.match_order(orderno, volume, buysell, price, time)
        else: 
            self.collisions += 1
        
        self.current_bid_bands, self.current_ask_bands = self.calculate_bands(time, self.bid_volumes, self.ask_volumes)
        self.current_bid_bands_post, self.current_ask_bands_post = self.calculate_bands(time, self.bid_volumes_post, self.ask_volumes_post)

        self.write_to_file(time)
    
    def calculate_bands(self, time, bid_list, asks_list):
        
        bids_flag = False
        asks_flag = False
        
        if not len(bid_list):
            bids_flag = True
        else:
            while len(bid_list) > 0 and 60 < self.time_diff(time, bid_list[-1][0]):
                bid_list.pop()
        

        if not len(asks_list):
            asks_flag = True
        else:
            while len(asks_list) > 0 and 60 < self.time_diff(time, asks_list[-1][0]):
                asks_list.pop()
           
        bid_sums = np.cumsum([info[1] for info in bid_list])
        ask_sums = np.cumsum([info[1] for info in asks_list])

        current_bids = [0]*5
        current_asks = [0]*5

        if len(bid_sums):
            current_bids[4] = bid_sums[-1]/60
        
        if len(ask_sums):
            current_asks[4] = ask_sums[-1]/60
        
        
        

        for i in range(len(ask_sums)):
            if 1 >= self.time_diff(time, asks_list[i][0]):
                current_asks[0] = ask_sums[i]/1
            if 5 >= self.time_diff(time, asks_list[i][0]):
                current_asks[1] = ask_sums[i]/5
            if 15 >= self.time_diff(time, asks_list[i][0]):
                current_asks[2] = ask_sums[i]/15
            if 30 >= self.time_diff(time, asks_list[i][0]):
                current_asks[3] = ask_sums[i]/30
                
        for i in range(len(bid_sums)):
            if 1 >= self.time_diff(time, bid_list[i][0]):
                current_bids[0] = bid_sums[i]/1
            if 5 >= self.time_diff(time, bid_list[i][0]):
                current_bids[1] = bid_sums[i]/5
            if 15 >= self.time_diff(time, bid_list[i][0]):
                current_bids[2] = bid_sums[i]/15
            if 30 >= self.time_diff(time, bid_list[i][0]):
                current_bids[3] = bid_sums[i]/30

        current_bids = [i/(10**7) for i in current_bids]
        current_asks = [i/(10**7) for i in current_asks]
                

        return current_bids, current_asks



    def time_diff(self, time1, time2):
        time1 = str(time1)
        time1 = parse(time1[0:2] + ":" + time1[2:4] + ":" + time1[4:6] + "." + time1[6:])
        time2 = str(time2)
        time2 = parse(time2[0:2] + ":" + time2[2:4] + ":" + time2[4:6] + "." + time2[6:])
        diff = time1 - time2
        return diff.total_seconds()

In [42]:
def generate_orderbook(orderlog_path, output_path, instrument, price_step, filename, fifth_task_fn):
    
    df = pd.read_csv(orderlog_path)
    actions = df['ACTION'].to_numpy()
    ordernos = df['ORDERNO'].to_numpy()
    volumes = df['VOLUME'].to_numpy()
    buysells = df['BUYSELL'].to_numpy()
    prices = df['PRICE'].to_numpy()
    times = df['TIME'].to_numpy()
    
    ob = OrderBookSpectrum(price_step, filename, fifth_task_fn[0], fifth_task_fn[1], fifth_task_fn[2], fifth_task_fn[3])

    for i in range(len(df)):
        ob.new_order(actions[i], ordernos[i], volumes[i], buysells[i], prices[i], times[i])

#     ob_file = open(output_path, "w+")
#     ob_file.write('time, orderno, buysell, price, volume\n')
#     for k, v in ob.ob_df.items():
#         to_write = str(v['time']) + ', ' + str(k) + ', ' + str(v['buysell']) + ', ' + str(v['price']) + ', ' + str(v['volume']) + '\n'
#         ob_file.write(to_write)

    return ob

### Generate Spectrums for all days

In [43]:
instruments_generate = ['EUR_RUB__TOD', 'EUR_RUB__TOM', 'USD000UTSTOM', 'USD000000TOD']
for i in instruments_generate:
    day = 1
    for j in glob('data/fx_separated/' + i + '/OrderLog*.txt'):
        i_pricestep = instruments_info[i]['PRICE_STEP']

        spectrum_filename = 'spectrum/' + i + '_spectrum_day' + str(day) + '.txt'
        orderbook_filename = 'orderbooks/' + i + '_orderbook_20180301.txt'

        volumes_trades_bids = 'volumes_trades/' + i + '_bids' + str(day) + '.txt'
        volumes_trades_asks = 'volumes_trades/' + i + '_asks' + str(day) + '.txt'

        liquidity_bids = 'liquidity_files/' + i + '_bids' + str(day) + '.txt'
        liquidity_asks = 'liquidity_files/' + i + '_asks' + str(day) + '.txt'
        print(i, day)

        generate_orderbook(j, orderbook_filename, i, i_pricestep, spectrum_filename, [volumes_trades_bids,
                                                                                      volumes_trades_asks,
                                                                                      liquidity_bids,
                                                                                      liquidity_asks])
        day += 1

EUR_RUB__TOD 1

Exception: not possible volume for match: 
 170564

Exception: not possible volume for match: 
 256711

Exception: not possible volume for match: 
 549040

Exception: not possible volume for match: 
 553706

Exception: not possible volume for match: 
 554989

Exception: not possible volume for match: 
 555040

Exception: not possible volume for match: 
 561516

Exception: not possible volume for match: 
 563647


KeyboardInterrupt: 