<a href="https://colab.research.google.com/github/TheDoctorAI/AI-Trader/blob/master/download_market.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ccxt pprint
# -*- coding: utf-8 -*-
import ccxt
from datetime import datetime
import csv
from google.colab import drive
from pprint import pprint



In [2]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
def write_to_csv(filename, data):
    with open(filename, mode='w') as output_file:
        output_file.write("Date,Open,High,Low,Close,Adj Close,Volume\n")
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)

In [0]:
def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds()
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        if ohlcv[0][0] >= earliest_timestamp:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        print(len(all_ohlcv), 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        if fetch_since < since:
            break
    return all_ohlcv

In [0]:
def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since)
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise

In [0]:
def scrape_candles_to_csv(filename, exchange_id, max_retries, symbol, timeframe, since, limit):
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,
    })
    if isinstance(since, str):
        since = exchange.parse8601(since)
    exchange.load_markets()
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    key = 0
    for candle in ohlcv:
        epoch = int(candle[0]) / 1000
        ohlcv[key][0] = datetime.utcfromtimestamp(epoch).strftime('%Y-%m-%d')
        ohlcv[key][5] = int(candle[5])
        ohlcv[key].append(ohlcv[key][5])
        ohlcv[key][5] = ohlcv[key][4]
        key += 1
    ohlen = len(ohlcv)
    pprint("num of candles: "+ str(ohlen))
    if ohlen > 399:
        ohrem = ohlen - 399
        pprint("removing: "+str(ohrem))
        ohlcv = ohlcv[ohrem:]
    write_to_csv(filename, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', filename)

In [7]:
exchange = "bitmex"
symbol = "BTC/USD"
start_date = "2019-04-01T00:00:00Z"
timeframe = "1h"
outfile = "/content/gdrive/My Drive/Colab Notebooks/ai-trader/data/btcusd-1h.csv"


scrape_candles_to_csv(outfile, exchange, 3, symbol, timeframe, start_date, 100)

100 candles in total from 2019-04-23T02:00:00.000Z to 2019-04-27T05:00:00.000Z
200 candles in total from 2019-04-18T22:00:00.000Z to 2019-04-27T05:00:00.000Z
300 candles in total from 2019-04-14T18:00:00.000Z to 2019-04-27T05:00:00.000Z
400 candles in total from 2019-04-10T14:00:00.000Z to 2019-04-27T05:00:00.000Z
500 candles in total from 2019-04-06T10:00:00.000Z to 2019-04-27T05:00:00.000Z
600 candles in total from 2019-04-02T06:00:00.000Z to 2019-04-27T05:00:00.000Z
700 candles in total from 2019-03-29T02:00:00.000Z to 2019-04-27T05:00:00.000Z
'num of candles: 700'
'removing: 301'
Saved 399 candles from None to None to /content/gdrive/My Drive/Colab Notebooks/ai-trader/data/btcusd-1h.csv
