In [None]:
#Load in data and initialize
import pandas as pd
from sortedcontainers import SortedDict
from itertools import islice
import csv

#Change this to your input csv name path <---------------------
input_filename = 'res_20190614'

input_df = pd.read_csv(f"{input_filename}.csv")

output_columns = ['timestamp', 'price', 'side', 'bp0', 'bq0', 'bp1', 'bq1', 'bp2', 'bq2', 'bp3', 'bq3', 'bp4', 'bq4', 'ap0', 'aq0', 'ap1', 'aq1', 'ap2', 'aq2', 'ap3', 'aq3', 'ap4', 'aq4', 'spread', 'mid', 'swmid', 'top_five_bid_depth', 'top_five_ask_depth']

order_book = {}
bid_book = SortedDict() 
ask_book = SortedDict()


In [None]:
orders_df = input_df.copy()

#Initialize output variables, set all to 0 or None
timestamp, price, side, bp0, bq0, bp1, bq1, bp2, bq2, bp3, bq3, bp4, bq4, ap0, aq0, ap1, aq1, ap2, aq2, ap3, aq3, ap4, aq4, spread, mid, swmid, top_five_bid_depth, top_five_ask_depth = 0, 0, None, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

#Open output csv file and make columns
with open(f"{input_filename}_output.csv", 'w', newline='') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(output_columns)   

    #Loop through orders
    for order in orders_df.itertuples():
        timestamp = order.timestamp
        side = order.side
        action = order.action
        order_id = order.id
        price = order.price
        quantity = order.quantity
        
        #Process order:
        
        #Put order in orderbook or modify/delete it based on action
        
        #If action is a, just add it to the order book as a new order, and then update the price level in the bid/ask book
        if action == 'a':
            order_book[order_id] = (side, price, quantity)
            if side == 'b':
                if price in bid_book:
                    bid_book[price] += quantity
                else:
                    bid_book[price] = quantity
            else:
                if price in ask_book:
                    ask_book[price] += quantity
                else:
                    ask_book[price] = quantity
        
        #If action is m, save the old order, overwrite it with the new order, remove the old order from the bid/ask book, and add the updated order
        elif action == 'm':
            
            #Overwrite order in order book, save old data
            old_side, old_price, old_quantity = order_book[order_id]
            order_book[order_id] = (side, price, quantity)
            
            #Remove old order from bid/ask book
            if old_side == 'b':
                bid_book[old_price] -= old_quantity
                if bid_book[old_price] == 0:
                    del bid_book[old_price]
            else:
                ask_book[old_price] -= old_quantity
                if ask_book[old_price] == 0:
                    del ask_book[old_price]

            # Add the updated order to the bid/ask book
            if side == 'b':
                if price in bid_book:
                    bid_book[price] += quantity
                else:
                    bid_book[price] = quantity
            else:
                if price in ask_book:
                    ask_book[price] += quantity
                else:
                    ask_book[price] = quantity

        #If action is d, remove the order from the order book and then remove it from the bid/ask book
        elif action == 'd':
            
            #Remove order from order book 
            old_side, old_price, old_quantity = order_book[order_id]
            del order_book[order_id]
            
            #Remove old order from bid/ask book
            if old_side == 'b':
                bid_book[old_price] -= old_quantity
                if bid_book[old_price] == 0:
                    del bid_book[old_price]
            else:
                ask_book[old_price] -= old_quantity
                if ask_book[old_price] == 0:
                    del ask_book[old_price]
        
        #Get top 5 levels of bid and ask book (non-0 quantity, levels with 0 should be deleted)
        
        #We will use islice on the sorted dict items to efficiently get the top 5 levels
        bid_levels = list(islice(reversed(bid_book.items()), 5))
        ask_levels = list(islice(ask_book.items(), 5))
        
        #Get prices and quantities
        bp0, bq0 = bid_levels[0] if len(bid_levels) > 0 else (0, 0)
        bp1, bq1 = bid_levels[1] if len(bid_levels) > 1 else (0, 0)
        bp2, bq2 = bid_levels[2] if len(bid_levels) > 2 else (0, 0)
        bp3, bq3 = bid_levels[3] if len(bid_levels) > 3 else (0, 0)
        bp4, bq4 = bid_levels[4] if len(bid_levels) > 4 else (0, 0)
        ap0, aq0 = ask_levels[0] if len(ask_levels) > 0 else (0, 0)
        ap1, aq1 = ask_levels[1] if len(ask_levels) > 1 else (0, 0)
        ap2, aq2 = ask_levels[2] if len(ask_levels) > 2 else (0, 0)
        ap3, aq3 = ask_levels[3] if len(ask_levels) > 3 else (0, 0)
        ap4, aq4 = ask_levels[4] if len(ask_levels) > 4 else (0, 0)
        
        #Adding a couple more fields I think might be useful: 
        spread = ap0 - bp0 if ap0 > 0 and bp0 > 0 else 0
        mid = (ap0 + bp0) / 2 if ap0 > 0 and bp0 > 0 else 0
        swmid = (ap0 * aq0 + bp0 * bq0) / (aq0 + bq0) if ap0 > 0 and bp0 > 0 and aq0 > 0 and bq0 > 0 else 0
        top_five_bid_depth = bq0 + bq1 + bq2 + bq3 + bq4
        top_five_ask_depth = aq0 + aq1 + aq2 + aq3 + aq4
        
        
        #Write to output csv
        writer.writerow([timestamp, price, side, bp0, bq0, bp1, bq1, bp2, bq2, bp3, bq3, bp4, bq4, ap0, aq0, ap1, aq1, ap2, aq2, ap3, aq3, ap4, aq4, spread, mid, swmid, top_five_bid_depth, top_five_ask_depth])
        
        
