# continuously download

In [1]:
import ccxt
exchange = getattr(ccxt, 'gdax') ()
symbol = 'BTC/USD'
interval = '1m'

In [2]:
# get data
import time
def get_ohlcv(exchange, interval, symbol):
    if exchange.has['fetchOHLCV']:
        time.sleep(exchange.rateLimit / 1000) # time.sleep wants seconds
        gdaxohlcv = exchange.fetch_ohlcv (symbol, interval) # 1 minute intervals
        return np.array(gdaxohlcv)

In [3]:
# get order book and make statistics on it
import pandas as pd
import numpy as np
from datetime import datetime

def get_orderbook_now(exchange, symbol):
    ob = exchange.fetch_order_book(symbol)
#     print(ob)
    ob_asks = np.array(ob['asks'])
    ob_bids = np.array(ob['bids'])
    ob_stats = {}
    ob_stats['time'] = datetime.now().timestamp()
    ob_stats['ask_vol'] = np.sum(ob_asks[:, 1])
    ob_stats['ask_stdovermean_price'] = np.std(ob_asks[:, 0])/np.mean(ob_asks[:, 0])
    ob_stats['ask_spread_price'] = ob_asks[-1, 0] - ob_asks[0, 0]
    ob_stats['ask_closest_price'] = ob_asks[0, 0]
    ob_stats['ask_closest_vol'] = ob_asks[0, 1]
    ob_stats['ask_weighted_mean_price'] = np.sum((ob_asks[:, 0] * ob_asks[:, 1])/np.sum(ob_asks[:, 1]))
    ob_stats['ask_closestpromille_vol'] = np.sum(ob_asks[np.argwhere(ob_asks[:, 0]>.999*ob_asks[0, 0]), 1])
    ob_stats['bid_vol'] = np.sum(ob_bids[:, 1])
    ob_stats['bid_stdovermean_price'] = np.std(ob_bids[:, 0])/np.mean(ob_bids[:, 0])
    ob_stats['bid_spread_price'] = ob_bids[-1, 0] - ob_bids[0, 0]
    ob_stats['bid_closest_price'] = ob_bids[0, 0]
    ob_stats['bid_closest_vol'] = ob_bids[0, 1]
    ob_stats['bid_weighted_mean_price'] = np.sum((ob_bids[:, 0] * ob_bids[:, 1])/np.sum(ob_bids[:, 1]))
    ob_stats['bid_closestpromille_vol'] = np.sum(ob_bids[np.argwhere(ob_bids[:, 0]>.999*ob_bids[0, 0]), 1])
    return ob_stats

columns = ['time',
           'ask_vol', 'ask_spread_price', 'ask_closest_price', 'ask_closest_vol', 'ask_weighted_mean_price',
           'bid_vol', 'bid_spread_price', 'bid_closest_price', 'bid_closest_vol', 'bid_weighted_mean_price']
ob = pd.DataFrame(columns=columns)
ob_stats = get_orderbook_now(exchange, symbol)
ob = ob.append(ob_stats, ignore_index=True)
time.sleep(5)
ob = ob.append(get_orderbook_now(exchange, symbol), ignore_index=True)

print(ob)

           time    ask_vol  ask_spread_price  ask_closest_price  \
0  1.523222e+09  39.204682             17.54            6962.23   
1  1.523222e+09  39.758484             17.54            6962.23   

   ask_closest_vol  ask_weighted_mean_price    bid_vol  bid_spread_price  \
0         0.926554              6975.533599  54.082134            -10.21   
1         0.954554              6975.312726  53.134606            -10.22   

   bid_closest_price  bid_closest_vol  bid_weighted_mean_price  \
0            6962.22        14.064024              6957.307769   
1            6962.22        13.211480              6957.245996   

   ask_closestpromille_vol  ask_stdovermean_price  bid_closestpromille_vol  \
0                39.204682               0.000735                36.439545   
1                39.758484               0.000742                35.476973   

   bid_stdovermean_price  
0               0.000407  
1               0.000421  


In [None]:
# main loop to download order book data
import time
from datetime import datetime
import ccxt

interval = 60  # seconds

gdax = getattr(ccxt, 'gdax') ()

ob_gdax = pd.DataFrame(columns=columns)
i=0
currweek = ''
while True:
    if currweek != datetime.now().strftime('%U'):  # empty dataset after a week
        ob_gdax = pd.DataFrame(columns=columns)
        currweek = datetime.now().strftime('%U')
    try:
        ob_dict = get_orderbook_now(gdax, symbol)
        ob_gdax = ob_gdax.append(ob_dict, ignore_index=True)
        ob_gdax.to_excel('gdax_orderbook_'+datetime.now().strftime('%Y-%U')+'.xlsx')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(ob_gdax), 'entries') 
    i+=1
    time.sleep(interval) # one minute interval

timeout, going on
No module named 'openpyxl'
loop 0: time: 2018-04-08 23:11:35.519619
gdax: 1 entries
timeout, going on
No module named 'openpyxl'
loop 1: time: 2018-04-08 23:12:36.124385
gdax: 2 entries


In [3]:
# concatenate data
def concat(old_data, add_data):
    """input: 2d ndarray
    newest first
    """
    if old_data.size == 0:
        return add_data
    if add_data.size == 0:
        return old_data
    overlap = np.argwhere(old_data[:, 0] == add_data[-1, 0])
    if overlap.size == 0:
        overlap = 0
    else:
        overlap = overlap[0][0]
    old_data = old_data[overlap+1:, :]
    new_data = np.vstack((add_data, old_data))
    return new_data


In [4]:
# import & export csv
import csv
def save_data(array, name):
    np.savetxt(name, array, delimiter=',')

def load_data(name):
    return np.genfromtxt(name, delimiter=',')

In [None]:
# main loop
import time
from datetime import datetime
import ccxt

gdax = getattr(ccxt, 'gdax') ()
bitfinex = getattr(ccxt, 'bitfinex') ()

all_data = np.array([[]])
all_data = load_data('gdax_data.csv')
all_data_bf = np.array([[]])
all_data_bf = load_data('bitfinex_data.csv')
i=0
while True:
    try:
        recent_data = get_ohlcv(gdax, interval, symbol)
        all_data = concat(all_data, recent_data)
        save_data(all_data, 'gdax_data.csv')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(all_data), 'entries') 
#     try:
#         recent_data_bf = get_ohlcv(bitfinex, interval, symbol)
#         all_data_bf = concat(all_data_bf, recent_data_bf)
#         save_data(all_data_bf, 'bitfinex_data.csv')
#     except Exception as e:
#         print('timeout, going on')
#         print(e)
#     print('bitfinex:', len(all_data_bf), 'entries')
    i+=1
    time.sleep(5 * 60) # every half an hour
    

loop 0: time: 2018-04-04 10:47:37.729051
gdax: 4707 entries
