In [1]:
import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

Using TensorFlow backend.


Data available: snapshot, ticker and trades

Snapshot (100ms and 10s): gives the state of the orderbook at specified time interval. Includes 100 non-zero levels, 50 ask  50 bid, and their quantities.

Ticker: gives the bid and ask price at each timestamp for which a trade has been made

Trades: gives the time, side, price and quantity of every trade

In [181]:
df_snapshot = pd.read_parquet(r"Data\coinbase_btc_usd\coinbase\btc_usd\l2_snapshots\100ms\coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0000_0100.parquet")
df_ticker = pd.read_parquet(r'Data\coinbase_btc_usd\coinbase\btc_usd\ticker\coinbase_btc_usd_ticker_2019_11_12.parquet')
df_trades = pd.read_parquet(r'Data\coinbase_btc_usd\coinbase\btc_usd\trades\coinbase_btc_usd_trades_2019_11_12.parquet')

In [171]:
df_snapshot.head()

Unnamed: 0_level_0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,...,aq41,aq42,aq43,aq44,aq45,aq46,aq47,aq48,aq49,aq50
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-11-12 00:00:00.000,8721.53,8720.59,8719.55,8719.5,8719.0,8718.02,8717.87,8717.85,8717.56,8716.06,...,8.2,2.203,2.6,0.005,2.5,0.896321,0.766,0.001737,1.82,0.933419
2019-11-12 00:00:00.100,8721.53,8720.59,8719.56,8719.55,8719.0,8718.02,8717.87,8717.85,8717.56,8716.06,...,8.2,2.203,2.6,0.005,2.5,0.896321,0.766,0.001737,1.82,0.933419
2019-11-12 00:00:00.200,8721.53,8720.59,8719.61,8719.56,8719.0,8718.02,8717.87,8717.85,8716.06,8716.0,...,0.009135,8.2,2.203,2.6,0.005,2.5,0.896321,0.766,0.001737,1.82
2019-11-12 00:00:00.300,8721.53,8720.59,8719.61,8719.56,8719.0,8718.02,8718.0,8717.87,8717.85,8716.6,...,0.009135,8.2,2.203,2.6,0.005,2.5,0.896321,0.766,0.001737,1.82
2019-11-12 00:00:00.400,8721.53,8720.59,8719.61,8719.56,8719.0,8718.99,8718.02,8718.0,8717.87,8717.85,...,0.009135,8.2,2.203,2.6,0.005,2.5,0.896321,0.766,0.001737,1.82


# Example Bid Price, Moving Up

Neural network modelling p(Y = y1 |Y >= y1, X=x)

There can be zero levels both in the spread as well as in the bid and ask sides. Furthermore, many changes are happening within our 100ms snapshot window. I thus see two ways by which to calculate the price level change.

#### 1.
As the price is to 0.01 accuracy one can simply take the difference in the best bid price and multiply by 100 to get it in the terms of how many $0.01$ levels it has increased by.

#### 2.
Or one can approximate the price level change by taking that which is closest to the current non-zero levels present.
 e.g. if the bid price moves up in the next step to 5.20 and at the current time step there are ask price levels at 5.15, 5.18, 5.22, ... then there will be a price level increase of 2 levels. This seems somewhat more arbitrary and heavily dependent on the current state of the orderbook.

Each snapshot file contains one hour of data at 100ms time interval. We will combine all this data into four separate dataframes (one to demonstrate the principles). One for where the bid price has increased, one where it has decreased and the same for the ask price. Thus for the example case of bid price increasing:

In [203]:
i = 0
for filename in os.listdir(r"Data\coinbase_btc_usd\coinbase\btc_usd\l2_snapshots\100ms"):
    print(filename)
    
    df_snapshot = pd.read_parquet(os.path.join(r"Data\coinbase_btc_usd\coinbase\btc_usd\l2_snapshots\100ms", filename))
    
    df_snapshot.dropna(inplace=True) #Drop na values
    print('Non na entries:', np.shape(df_snapshot)[0])

    # Check no price levels with zero quantities:
    assert np.count_nonzero(df_snapshot.values) == np.shape(df_snapshot)[0] * np.shape(df_snapshot)[1]

    df_snapshot['future_bid'] = df_snapshot['b1'].shift(-1)

    # Calculate how many levels the bid/ask price changed by
    df_snapshot['bid_change_n'] = df_snapshot.apply(lambda x: 100*(x['future_bid'] - x['b1']), axis=1)

    #Delete last one as don't know how much it has changed by
    df_snapshot.drop(df_snapshot.index[-1], axis=0, inplace=True)

    # Ignore the prices columns
    df_snapshot_cut = df_snapshot[df_snapshot.columns[100:]]

    # Take only the values where the bid price increases
    df_bid_up = df_snapshot_cut[df_snapshot_cut['bid_change_n'] > 0].drop(['future_bid'], axis=1)

    # How many entries in this hour left?
    print('Entries left:', np.shape(df_bid_up)[0])
    
    if i==0:
        df = df_bid_up
        i += 1
    else:
        df = df.append(df_bid_up)


coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0000_0100.parquet
Non na entries: 28547
Entries left: 3589
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0100_0200.parquet
Non na entries: 29365
Entries left: 4635
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0200_0300.parquet
Non na entries: 28656
Entries left: 3228
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0300_0400.parquet
Non na entries: 30871
Entries left: 4663
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0400_0500.parquet
Non na entries: 29187
Entries left: 3622
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0500_0600.parquet
Non na entries: 31956
Entries left: 5930
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0600_0700.parquet
Non na entries: 23169
Entries left: 3822
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0700_0800.parquet
Non na entries: 22659
Entries left: 1894
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_12_0800_0900.parquet
Non na entries: 19565
En

Non na entries: 27521
Entries left: 1087
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0100_0200.parquet
Non na entries: 24930
Entries left: 2105
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0200_0300.parquet
Non na entries: 23995
Entries left: 3622
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0300_0400.parquet
Non na entries: 23348
Entries left: 3409
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0400_0500.parquet
Non na entries: 23337
Entries left: 3592
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0500_0600.parquet
Non na entries: 20327
Entries left: 1931
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0600_0700.parquet
Non na entries: 19324
Entries left: 2648
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0700_0800.parquet
Non na entries: 19165
Entries left: 1062
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0800_0900.parquet
Non na entries: 19953
Entries left: 2897
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_15_0

Non na entries: 18296
Entries left: 485
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0200_0300.parquet
Non na entries: 22012
Entries left: 770
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0300_0400.parquet
Non na entries: 22133
Entries left: 1118
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0400_0500.parquet
Non na entries: 21448
Entries left: 330
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0500_0600.parquet
Non na entries: 19851
Entries left: 579
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0600_0700.parquet
Non na entries: 19670
Entries left: 521
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0700_0800.parquet
Non na entries: 18773
Entries left: 302
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0800_0900.parquet
Non na entries: 19211
Entries left: 741
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_0900_1000.parquet
Non na entries: 15352
Entries left: 137
coinbase_btc_usd_l2_book_snapshots_depth50_2019_11_18_1000_1100

In [205]:
df.to_csv("df_snapshot_100ms_bid_up.csv")

In [21]:
value_counts = np.unique(np.round(df_bid_up['bid_change_n'].values).astype(int), return_counts=True)
value_counts

(array([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,   11,
          12,   13,   14,   15,   16,   17,   18,   19,   20,   21,   22,
          23,   24,   25,   26,   27,   28,   29,   30,   31,   32,   33,
          34,   35,   36,   37,   38,   39,   40,   41,   42,   43,   44,
          45,   46,   47,   48,   49,   50,   51,   52,   53,   54,   55,
          56,   57,   58,   59,   60,   61,   62,   63,   64,   65,   66,
          67,   68,   69,   70,   71,   72,   73,   74,   75,   76,   77,
          78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,
          89,   90,   91,   92,   93,   94,   95,   96,   97,   98,   99,
         100,  101,  102,  103,  104,  105,  106,  107,  108,  109,  110,
         111,  112,  113,  114,  115,  116,  117,  118,  119,  120,  121,
         122,  123,  124,  125,  126,  127,  128,  129,  130,  131,  132,
         133,  134,  135,  136,  137,  138,  139,  140,  141,  142,  143,
         144,  145,  146,  147,  148, 