# continuously download

In [1]:
import ccxt
exchange = getattr(ccxt, 'gdax') ()
symbol = 'BTC/USD'
interval = '1m'

In [2]:
# get data
import time
def get_ohlcv(exchange, interval, symbol):
    if exchange.has['fetchOHLCV']:
        time.sleep(exchange.rateLimit / 1000) # time.sleep wants seconds
        gdaxohlcv = exchange.fetch_ohlcv (symbol, interval) # 1 minute intervals
        return np.array(gdaxohlcv)

In [3]:
# get order book and make statistics on it
import pandas as pd
import numpy as np
from datetime import datetime

def get_orderbook_now(exchange, symbol):
    ob = exchange.fetch_order_book(symbol)
#     print(ob)
    ob_asks = np.array(ob['asks'])
    ob_bids = np.array(ob['bids'])
    ob_stats = {}
    ob_stats['time'] = datetime.now().timestamp()
    ob_stats['ask_vol'] = np.sum(ob_asks[:, 1])
    ob_stats['ask_stdovermean_price'] = np.std(ob_asks[:, 0])/np.mean(ob_asks[:, 0])
    ob_stats['ask_spread_price'] = ob_asks[-1, 0] - ob_asks[0, 0]
    ob_stats['ask_closest_price'] = ob_asks[0, 0]
    ob_stats['ask_closest_vol'] = ob_asks[0, 1]
    ob_stats['ask_weighted_mean_price'] = np.sum((ob_asks[:, 0] * ob_asks[:, 1])/np.sum(ob_asks[:, 1]))
    ob_stats['ask_closestpromille_vol'] = np.sum(ob_asks[np.argwhere(ob_asks[:, 0]>.999*ob_asks[0, 0]), 1])
    ob_stats['bid_vol'] = np.sum(ob_bids[:, 1])
    ob_stats['bid_stdovermean_price'] = np.std(ob_bids[:, 0])/np.mean(ob_bids[:, 0])
    ob_stats['bid_spread_price'] = ob_bids[-1, 0] - ob_bids[0, 0]
    ob_stats['bid_closest_price'] = ob_bids[0, 0]
    ob_stats['bid_closest_vol'] = ob_bids[0, 1]
    ob_stats['bid_weighted_mean_price'] = np.sum((ob_bids[:, 0] * ob_bids[:, 1])/np.sum(ob_bids[:, 1]))
    ob_stats['bid_closestpromille_vol'] = np.sum(ob_bids[np.argwhere(ob_bids[:, 0]>.999*ob_bids[0, 0]), 1])
    return ob_stats

columns = ['time',
           'ask_vol', 'ask_spread_price', 'ask_closest_price', 'ask_closest_vol', 'ask_weighted_mean_price',
           'bid_vol', 'bid_spread_price', 'bid_closest_price', 'bid_closest_vol', 'bid_weighted_mean_price']
ob = pd.DataFrame(columns=columns)
ob_stats = get_orderbook_now(exchange, symbol)
ob = ob.append(ob_stats, ignore_index=True)
time.sleep(5)
ob = ob.append(get_orderbook_now(exchange, symbol), ignore_index=True)

print(ob)

           time    ask_vol  ask_spread_price  ask_closest_price  \
0  1.523222e+09  39.204682             17.54            6962.23   
1  1.523222e+09  39.758484             17.54            6962.23   

   ask_closest_vol  ask_weighted_mean_price    bid_vol  bid_spread_price  \
0         0.926554              6975.533599  54.082134            -10.21   
1         0.954554              6975.312726  53.134606            -10.22   

   bid_closest_price  bid_closest_vol  bid_weighted_mean_price  \
0            6962.22        14.064024              6957.307769   
1            6962.22        13.211480              6957.245996   

   ask_closestpromille_vol  ask_stdovermean_price  bid_closestpromille_vol  \
0                39.204682               0.000735                36.439545   
1                39.758484               0.000742                35.476973   

   bid_stdovermean_price  
0               0.000407  
1               0.000421  


In [None]:
# main loop to download order book data
import time
from datetime import datetime
import ccxt

interval = 60  # seconds

gdax = getattr(ccxt, 'gdax') ()

ob_gdax = pd.DataFrame(columns=columns)
i=0
currweek = ''
while True:
    if currweek != datetime.now().strftime('%U'):  # empty dataset after a week
        ob_gdax = pd.DataFrame(columns=columns)
        currweek = datetime.now().strftime('%U')
    try:
        ob_dict = get_orderbook_now(gdax, symbol)
        ob_gdax = ob_gdax.append(ob_dict, ignore_index=True)
        ob_gdax.to_excel('gdax_orderbook_'+datetime.now().strftime('%Y-%U')+'.xlsx')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(ob_gdax), 'entries') 
    i+=1
    time.sleep(interval) # one minute interval

timeout, going on
No module named 'openpyxl'
loop 0: time: 2018-04-08 23:11:35.519619
gdax: 1 entries
timeout, going on
No module named 'openpyxl'
loop 1: time: 2018-04-08 23:12:36.124385
gdax: 2 entries
loop 2: time: 2018-04-08 23:13:36.646099
gdax: 3 entries
loop 3: time: 2018-04-08 23:14:37.286406
gdax: 4 entries
loop 4: time: 2018-04-08 23:15:37.911570
gdax: 5 entries
loop 5: time: 2018-04-08 23:16:38.537399
gdax: 6 entries
loop 6: time: 2018-04-08 23:17:39.356026
gdax: 7 entries
loop 7: time: 2018-04-08 23:18:39.617141
gdax: 8 entries
loop 8: time: 2018-04-08 23:19:40.187016
gdax: 9 entries
loop 9: time: 2018-04-08 23:20:40.814921
gdax: 10 entries
loop 10: time: 2018-04-08 23:21:41.064457
gdax: 11 entries
loop 11: time: 2018-04-08 23:22:41.854920
gdax: 12 entries
loop 12: time: 2018-04-08 23:23:42.472496
gdax: 13 entries
loop 13: time: 2018-04-08 23:24:43.100989
gdax: 14 entries
loop 14: time: 2018-04-08 23:25:43.717684
gdax: 15 entries
loop 15: time: 2018-04-08 23:26:44.346070
gd

loop 137: time: 2018-04-09 01:29:59.837759
gdax: 138 entries
loop 138: time: 2018-04-09 01:31:00.274478
gdax: 139 entries
loop 139: time: 2018-04-09 01:32:00.784868
gdax: 140 entries
loop 140: time: 2018-04-09 01:33:01.204773
gdax: 141 entries
loop 141: time: 2018-04-09 01:34:01.896212
gdax: 142 entries
loop 142: time: 2018-04-09 01:35:02.754229
gdax: 143 entries
loop 143: time: 2018-04-09 01:36:03.567160
gdax: 144 entries
loop 144: time: 2018-04-09 01:37:03.978012
gdax: 145 entries
loop 145: time: 2018-04-09 01:38:04.427324
gdax: 146 entries
loop 146: time: 2018-04-09 01:39:05.198509
gdax: 147 entries
loop 147: time: 2018-04-09 01:40:06.070515
gdax: 148 entries
loop 148: time: 2018-04-09 01:41:06.881020
gdax: 149 entries
loop 149: time: 2018-04-09 01:42:07.310229
gdax: 150 entries
loop 150: time: 2018-04-09 01:43:07.839375
gdax: 151 entries
loop 151: time: 2018-04-09 01:44:08.349735
gdax: 152 entries
loop 152: time: 2018-04-09 01:45:09.103220
gdax: 153 entries
loop 153: time: 2018-04-

loop 272: time: 2018-04-09 03:46:44.039897
gdax: 273 entries
loop 273: time: 2018-04-09 03:47:44.976677
gdax: 274 entries
loop 274: time: 2018-04-09 03:48:45.620773
gdax: 275 entries
loop 275: time: 2018-04-09 03:49:46.743868
gdax: 276 entries
loop 276: time: 2018-04-09 03:50:47.702331
gdax: 277 entries
loop 277: time: 2018-04-09 03:51:48.739439
gdax: 278 entries
loop 278: time: 2018-04-09 03:52:49.397914
gdax: 279 entries
loop 279: time: 2018-04-09 03:53:50.374827
gdax: 280 entries
loop 280: time: 2018-04-09 03:54:51.228355
gdax: 281 entries
loop 281: time: 2018-04-09 03:55:51.882341
gdax: 282 entries
loop 282: time: 2018-04-09 03:56:53.280410
gdax: 283 entries
loop 283: time: 2018-04-09 03:57:53.855138
gdax: 284 entries
loop 284: time: 2018-04-09 03:58:54.815835
gdax: 285 entries
loop 285: time: 2018-04-09 03:59:55.888755
gdax: 286 entries
loop 286: time: 2018-04-09 04:00:56.853805
gdax: 287 entries
loop 287: time: 2018-04-09 04:01:57.470933
gdax: 288 entries
loop 288: time: 2018-04-

loop 404: time: 2018-04-09 06:01:05.791704
gdax: 404 entries
loop 405: time: 2018-04-09 06:02:06.799374
gdax: 405 entries
loop 406: time: 2018-04-09 06:03:07.914889
gdax: 406 entries
loop 407: time: 2018-04-09 06:04:08.994217
gdax: 407 entries
loop 408: time: 2018-04-09 06:05:10.107290
gdax: 408 entries
loop 409: time: 2018-04-09 06:06:10.801166
gdax: 409 entries
loop 410: time: 2018-04-09 06:07:11.558100
gdax: 410 entries
loop 411: time: 2018-04-09 06:08:12.632568
gdax: 411 entries
loop 412: time: 2018-04-09 06:09:13.703149
gdax: 412 entries
loop 413: time: 2018-04-09 06:10:14.785732
gdax: 413 entries
loop 414: time: 2018-04-09 06:11:15.826591
gdax: 414 entries
loop 415: time: 2018-04-09 06:12:16.933382
gdax: 415 entries
loop 416: time: 2018-04-09 06:13:17.960072
gdax: 416 entries
loop 417: time: 2018-04-09 06:14:18.728500
gdax: 417 entries
loop 418: time: 2018-04-09 06:15:19.880443
gdax: 418 entries
loop 419: time: 2018-04-09 06:16:20.985251
gdax: 419 entries
loop 420: time: 2018-04-

loop 539: time: 2018-04-09 08:18:40.061169
gdax: 539 entries
loop 540: time: 2018-04-09 08:19:41.366385
gdax: 540 entries
loop 541: time: 2018-04-09 08:20:42.670052
gdax: 541 entries
loop 542: time: 2018-04-09 08:21:43.952241
gdax: 542 entries
loop 543: time: 2018-04-09 08:22:45.258061
gdax: 543 entries
loop 544: time: 2018-04-09 08:23:46.541185
gdax: 544 entries
loop 545: time: 2018-04-09 08:24:47.850456
gdax: 545 entries
loop 546: time: 2018-04-09 08:25:49.150733
gdax: 546 entries
loop 547: time: 2018-04-09 08:26:50.644214
gdax: 547 entries
loop 548: time: 2018-04-09 08:27:51.949786
gdax: 548 entries
loop 549: time: 2018-04-09 08:28:53.273356
gdax: 549 entries
loop 550: time: 2018-04-09 08:29:54.586930
gdax: 550 entries
loop 551: time: 2018-04-09 08:30:55.804269
gdax: 551 entries
loop 552: time: 2018-04-09 08:31:56.806024
gdax: 552 entries
loop 553: time: 2018-04-09 08:32:58.077208
gdax: 553 entries
loop 554: time: 2018-04-09 08:33:59.470116
gdax: 554 entries
loop 555: time: 2018-04-

loop 674: time: 2018-04-09 10:36:40.006598
gdax: 674 entries
loop 675: time: 2018-04-09 10:37:41.431512
gdax: 675 entries
loop 676: time: 2018-04-09 10:38:42.824694
gdax: 676 entries
loop 677: time: 2018-04-09 10:39:43.963752
gdax: 677 entries
loop 678: time: 2018-04-09 10:40:45.423847
gdax: 678 entries
loop 679: time: 2018-04-09 10:41:46.518818
gdax: 679 entries
loop 680: time: 2018-04-09 10:42:48.043000
gdax: 680 entries
loop 681: time: 2018-04-09 10:43:49.486989
gdax: 681 entries
loop 682: time: 2018-04-09 10:44:50.851198
gdax: 682 entries
loop 683: time: 2018-04-09 10:45:52.365244
gdax: 683 entries
loop 684: time: 2018-04-09 10:46:53.928697
gdax: 684 entries
loop 685: time: 2018-04-09 10:47:55.026293
gdax: 685 entries
loop 686: time: 2018-04-09 10:48:56.476885
gdax: 686 entries
loop 687: time: 2018-04-09 10:49:57.568518
gdax: 687 entries
loop 688: time: 2018-04-09 10:50:59.032085
gdax: 688 entries
loop 689: time: 2018-04-09 10:52:00.432938
gdax: 689 entries
loop 690: time: 2018-04-

loop 809: time: 2018-04-09 12:54:59.536818
gdax: 809 entries
loop 810: time: 2018-04-09 12:56:01.190095
gdax: 810 entries
loop 811: time: 2018-04-09 12:57:02.732435
gdax: 811 entries
loop 812: time: 2018-04-09 12:58:04.280940
gdax: 812 entries
loop 813: time: 2018-04-09 12:59:05.833841
gdax: 813 entries
loop 814: time: 2018-04-09 13:00:07.455183
gdax: 814 entries
loop 815: time: 2018-04-09 13:01:09.060049
gdax: 815 entries
loop 816: time: 2018-04-09 13:02:10.261903
gdax: 816 entries
loop 817: time: 2018-04-09 13:03:11.892768
gdax: 817 entries
loop 818: time: 2018-04-09 13:04:13.623920
gdax: 818 entries
loop 819: time: 2018-04-09 13:05:15.220604
gdax: 819 entries
loop 820: time: 2018-04-09 13:06:16.451887
gdax: 820 entries
loop 821: time: 2018-04-09 13:07:18.331971
gdax: 821 entries
loop 822: time: 2018-04-09 13:08:19.946635
gdax: 822 entries
loop 823: time: 2018-04-09 13:09:21.173422
gdax: 823 entries
loop 824: time: 2018-04-09 13:10:22.427181
gdax: 824 entries
loop 825: time: 2018-04-

loop 944: time: 2018-04-09 15:13:42.197897
gdax: 944 entries
loop 945: time: 2018-04-09 15:14:43.871105
gdax: 945 entries
loop 946: time: 2018-04-09 15:15:45.699747
gdax: 946 entries
loop 947: time: 2018-04-09 15:16:47.515073
gdax: 947 entries
loop 948: time: 2018-04-09 15:17:49.194251
gdax: 948 entries
loop 949: time: 2018-04-09 15:18:51.060182
gdax: 949 entries
loop 950: time: 2018-04-09 15:19:52.871392
gdax: 950 entries
loop 951: time: 2018-04-09 15:20:54.590890
gdax: 951 entries
loop 952: time: 2018-04-09 15:21:56.371319
gdax: 952 entries
loop 953: time: 2018-04-09 15:22:58.096471
gdax: 953 entries
loop 954: time: 2018-04-09 15:23:59.976865
gdax: 954 entries
loop 955: time: 2018-04-09 15:25:01.798630
gdax: 955 entries
loop 956: time: 2018-04-09 15:26:03.572346
gdax: 956 entries
loop 957: time: 2018-04-09 15:27:05.262900
gdax: 957 entries
loop 958: time: 2018-04-09 15:28:07.032294
gdax: 958 entries
loop 959: time: 2018-04-09 15:29:08.802240
gdax: 959 entries
loop 960: time: 2018-04-

loop 1076: time: 2018-04-09 17:29:39.316522
gdax: 1076 entries
loop 1077: time: 2018-04-09 17:30:41.237105
gdax: 1077 entries
loop 1078: time: 2018-04-09 17:31:43.098230
gdax: 1078 entries
loop 1079: time: 2018-04-09 17:32:45.052940
gdax: 1079 entries
loop 1080: time: 2018-04-09 17:33:47.002337
gdax: 1080 entries
loop 1081: time: 2018-04-09 17:34:49.001948
gdax: 1081 entries
loop 1082: time: 2018-04-09 17:35:51.096781
gdax: 1082 entries
loop 1083: time: 2018-04-09 17:36:53.057179
gdax: 1083 entries
loop 1084: time: 2018-04-09 17:37:54.884625
gdax: 1084 entries
loop 1085: time: 2018-04-09 17:38:56.756799
gdax: 1085 entries
loop 1086: time: 2018-04-09 17:39:58.651946
gdax: 1086 entries
loop 1087: time: 2018-04-09 17:41:00.603800
gdax: 1087 entries
loop 1088: time: 2018-04-09 17:42:02.539361
gdax: 1088 entries
loop 1089: time: 2018-04-09 17:43:04.458342
gdax: 1089 entries
loop 1090: time: 2018-04-09 17:44:06.412655
gdax: 1090 entries
loop 1091: time: 2018-04-09 17:45:08.366878
gdax: 1091 

loop 1207: time: 2018-04-09 19:44:58.316283
gdax: 1207 entries
loop 1208: time: 2018-04-09 19:46:00.018138
gdax: 1208 entries
loop 1209: time: 2018-04-09 19:47:02.153273
gdax: 1209 entries
loop 1210: time: 2018-04-09 19:48:04.329764
gdax: 1210 entries
loop 1211: time: 2018-04-09 19:49:06.324052
gdax: 1211 entries
loop 1212: time: 2018-04-09 19:50:08.613123
gdax: 1212 entries
loop 1213: time: 2018-04-09 19:51:10.673416
gdax: 1213 entries
loop 1214: time: 2018-04-09 19:52:12.440733
gdax: 1214 entries
loop 1215: time: 2018-04-09 19:53:14.609963
gdax: 1215 entries
loop 1216: time: 2018-04-09 19:54:16.726256
gdax: 1216 entries
loop 1217: time: 2018-04-09 19:55:18.994700
gdax: 1217 entries
loop 1218: time: 2018-04-09 19:56:21.150948
gdax: 1218 entries
loop 1219: time: 2018-04-09 19:57:23.167871
gdax: 1219 entries
loop 1220: time: 2018-04-09 19:58:25.299245
gdax: 1220 entries
loop 1221: time: 2018-04-09 19:59:27.414866
gdax: 1221 entries
loop 1222: time: 2018-04-09 20:00:29.643858
gdax: 1222 

In [3]:
# concatenate data
def concat(old_data, add_data):
    """input: 2d ndarray
    newest first
    """
    if old_data.size == 0:
        return add_data
    if add_data.size == 0:
        return old_data
    overlap = np.argwhere(old_data[:, 0] == add_data[-1, 0])
    if overlap.size == 0:
        overlap = 0
    else:
        overlap = overlap[0][0]
    old_data = old_data[overlap+1:, :]
    new_data = np.vstack((add_data, old_data))
    return new_data


In [4]:
# import & export csv
import csv
def save_data(array, name):
    np.savetxt(name, array, delimiter=',')

def load_data(name):
    return np.genfromtxt(name, delimiter=',')

In [None]:
# main loop
import time
from datetime import datetime
import ccxt

gdax = getattr(ccxt, 'gdax') ()
bitfinex = getattr(ccxt, 'bitfinex') ()

all_data = np.array([[]])
all_data = load_data('gdax_data.csv')
all_data_bf = np.array([[]])
all_data_bf = load_data('bitfinex_data.csv')
i=0
while True:
    try:
        recent_data = get_ohlcv(gdax, interval, symbol)
        all_data = concat(all_data, recent_data)
        save_data(all_data, 'gdax_data.csv')
    except Exception as e:
        print('timeout, going on')
        print(e)
    print('loop {:d}:'.format(i), 'time:', datetime.now())
    print('gdax:', len(all_data), 'entries') 
#     try:
#         recent_data_bf = get_ohlcv(bitfinex, interval, symbol)
#         all_data_bf = concat(all_data_bf, recent_data_bf)
#         save_data(all_data_bf, 'bitfinex_data.csv')
#     except Exception as e:
#         print('timeout, going on')
#         print(e)
#     print('bitfinex:', len(all_data_bf), 'entries')
    i+=1
    time.sleep(5 * 60) # every half an hour
    

loop 0: time: 2018-04-04 10:47:37.729051
gdax: 4707 entries
