# continuously download

In [1]:
import ccxt
exchange = getattr(ccxt, 'gdax') ()
symbol = 'BTC/USD'
interval = '1m'

In [2]:
# get data
import time
def get_ohlcv(exchange, interval, symbol):
    if exchange.has['fetchOHLCV']:
        time.sleep(exchange.rateLimit / 1000) # time.sleep wants seconds
        gdaxohlcv = exchange.fetch_ohlcv (symbol, interval) # 1 minute intervals
        return np.array(gdaxohlcv)

In [None]:
# get order book and make statistics on it
import pandas as pd
import numpy as np
from datetime import datetime

def get_orderbook_now(exchange, symbol):
    ob = exchange.fetch_order_book(symbol)
#     print(ob)
    ob_asks = np.array(ob['asks'])
    ob_bids = np.array(ob['bids'])
    ob_stats = {}
    ob_stats['time'] = datetime.now().timestamp()
    ob_stats['ask_vol'] = np.sum(ob_asks[:, 1])
    ob_stats['ask_stdovermean_price'] = np.std(ob_asks[:, 0])/np.mean(ob_asks[:, 0])
    ob_stats['ask_spread_price'] = ob_asks[-1, 0] - ob_asks[0, 0]
    ob_stats['ask_closest_price'] = ob_asks[0, 0]
    ob_stats['ask_closest_vol'] = ob_asks[0, 1]
    ob_stats['ask_weighted_mean_price'] = np.sum((ob_asks[:, 0] * ob_asks[:, 1])/np.sum(ob_asks[:, 1]))
    ob_stats['ask_closestpromille_vol'] = np.sum(ob_asks[np.argwhere(ob_asks[:, 0]>.999*ob_asks[0, 0]), 1])
    ob_stats['bid_vol'] = np.sum(ob_bids[:, 1])
    ob_stats['bid_stdovermean_price'] = np.std(ob_bids[:, 0])/np.mean(ob_bids[:, 0])
    ob_stats['bid_spread_price'] = ob_bids[-1, 0] - ob_bids[0, 0]
    ob_stats['bid_closest_price'] = ob_bids[0, 0]
    ob_stats['bid_closest_vol'] = ob_bids[0, 1]
    ob_stats['bid_weighted_mean_price'] = np.sum((ob_bids[:, 0] * ob_bids[:, 1])/np.sum(ob_bids[:, 1]))
    ob_stats['bid_closestpromille_vol'] = np.sum(ob_bids[np.argwhere(ob_bids[:, 0]>.999*ob_bids[0, 0]), 1])
    return ob_stats

columns = ['time',
           'ask_vol', 'ask_spread_price', 'ask_closest_price', 'ask_closest_vol', 'ask_weighted_mean_price',
           'bid_vol', 'bid_spread_price', 'bid_closest_price', 'bid_closest_vol', 'bid_weighted_mean_price']
ob = pd.DataFrame(columns=columns)
ob_stats = get_orderbook_now(exchange, symbol)
ob = ob.append(ob_stats, ignore_index=True)
time.sleep(5)
ob = ob.append(get_orderbook_now(exchange, symbol), ignore_index=True)

print(ob)

In [None]:
# main loop to download order book data
import time
from datetime import datetime
import ccxt

interval = 60  # seconds

gdax = getattr(ccxt, 'gdax') ()

ob_gdax = pd.DataFrame(columns=columns)
i=0
currweek = ''
while True:
    if currweek != datetime.now().strftime('%U'):  # empty dataset after a week
        ob_gdax = pd.DataFrame(columns=columns)
        currweek = datetime.now().strftime('%U')
    try:
        ob_dict = get_orderbook_now(gdax, symbol)
        ob_gdax = ob_gdax.append(ob_dict, ignore_index=True)
        ob_gdax.to_excel('gdax_orderbook_'+datetime.now().strftime('%Y-%U')+'.xlsx')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(ob_gdax), 'entries') 
    i+=1
    time.sleep(interval) # one minute interval

In [3]:
# concatenate data
def concat(old_data, add_data):
    """input: 2d ndarray
    newest first
    """
    if old_data.size == 0:
        return add_data
    if add_data.size == 0:
        return old_data
    overlap = np.argwhere(old_data[:, 0] == add_data[-1, 0])
    if overlap.size == 0:
        overlap = 0
    else:
        overlap = overlap[0][0]
    old_data = old_data[overlap+1:, :]
    new_data = np.vstack((add_data, old_data))
    return new_data


In [4]:
# import & export csv
import csv
def save_data(array, name):
    np.savetxt(name, array, delimiter=',')

def load_data(name):
    return np.genfromtxt(name, delimiter=',')

In [None]:
# main loop
import time
from datetime import datetime
import ccxt
import numpy as np

gdax = getattr(ccxt, 'gdax') ()
bitfinex = getattr(ccxt, 'bitfinex') ()

all_data = np.array([[]])
all_data = load_data('gdax_data.csv')
all_data_bf = np.array([[]])
all_data_bf = load_data('bitfinex_data.csv')
i=0
while True:
    try:
        recent_data = get_ohlcv(gdax, interval, symbol)
        all_data = concat(all_data, recent_data)
        save_data(all_data, 'gdax_data.csv')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(all_data), 'entries') 
#     try:
#         recent_data_bf = get_ohlcv(bitfinex, interval, symbol)
#         all_data_bf = concat(all_data_bf, recent_data_bf)
#         save_data(all_data_bf, 'bitfinex_data.csv')
#     except Exception as e:
#         print('timeout, going on')
#         print(e)
#     print('bitfinex:', len(all_data_bf), 'entries')
    i+=1
    time.sleep(5 * 60) # every half an hour
    

loop 0: time: 2018-04-09 08:29:24.479678
gdax: 11768 entries
loop 1: time: 2018-04-09 08:34:26.479335
gdax: 11774 entries
loop 2: time: 2018-04-09 08:39:28.476913
gdax: 11778 entries
loop 3: time: 2018-04-09 08:44:30.316563
gdax: 11783 entries
loop 4: time: 2018-04-09 08:49:32.218821
gdax: 11789 entries
loop 5: time: 2018-04-09 08:54:34.107176
gdax: 11793 entries
loop 6: time: 2018-04-09 08:59:35.984999
gdax: 11799 entries
loop 7: time: 2018-04-09 09:04:37.975675
gdax: 11803 entries
loop 8: time: 2018-04-09 09:09:39.938435
gdax: 11809 entries
loop 9: time: 2018-04-09 09:14:41.812618
gdax: 11813 entries
loop 10: time: 2018-04-09 09:19:43.789529
gdax: 11819 entries
loop 11: time: 2018-04-09 09:24:45.761822
gdax: 11824 entries
loop 12: time: 2018-04-09 09:29:47.661825
gdax: 11829 entries
loop 13: time: 2018-04-09 09:34:49.195941
gdax: 11834 entries
loop 14: time: 2018-04-09 09:39:51.194734
gdax: 11839 entries
loop 15: time: 2018-04-09 09:44:53.138496
gdax: 11843 entries
loop 16: time: 201

loop 130: time: 2018-04-09 19:23:31.046173
gdax: 12423 entries
loop 131: time: 2018-04-09 19:28:32.720291
gdax: 12428 entries
loop 132: time: 2018-04-09 19:33:34.717905
gdax: 12433 entries
loop 133: time: 2018-04-09 19:38:36.166963
gdax: 12438 entries
loop 134: time: 2018-04-09 19:43:38.110206
gdax: 12443 entries
loop 135: time: 2018-04-09 19:48:40.098675
gdax: 12448 entries
loop 136: time: 2018-04-09 19:53:42.191957
gdax: 12453 entries
loop 137: time: 2018-04-09 19:58:44.214157
gdax: 12458 entries
loop 138: time: 2018-04-09 20:03:46.262616
gdax: 12463 entries
loop 139: time: 2018-04-09 20:08:48.186835
gdax: 12468 entries
loop 140: time: 2018-04-09 20:13:50.217718
gdax: 12473 entries
loop 141: time: 2018-04-09 20:18:52.152402
gdax: 12478 entries
loop 142: time: 2018-04-09 20:23:53.746087
gdax: 12483 entries
loop 143: time: 2018-04-09 20:28:55.742309
gdax: 12487 entries
loop 144: time: 2018-04-09 20:33:57.606864
gdax: 12493 entries
loop 145: time: 2018-04-09 20:38:59.612081
gdax: 12498 