In [1]:
"""
Downloads data of Crypto Currencies available on Bitmex and Binance
"""

'\nDownloads data of Crypto Currencies available on Bitmex and Binance\n'

In [2]:
!pip install bitmex
!pip install python-binance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# IMPORTS
import math
import os.path
import time
from datetime import timedelta, datetime
from bitmex import bitmex
from binance.client import Client
from dateutil import parser
from tqdm import tqdm_notebook
import pandas as pd

In [5]:
def mkdir(folder_name):
    """Creates folder where data is going to be stored"""
    if not os.path.isdir(folder_name) and folder_name != '':
        os.mkdir(folder_name)

In [6]:
def minutes_of_new_data(symbol, kline_size, data, source):
    """Calculate how many minutes of data need to be downloaded."""
    if len(data) > 0:
        old = parser.parse(data["timestamp"].iloc[-1])
    elif source == "binance":
        old = datetime.strptime('1 Jan 2017', '%d %b %Y')
    elif source == "bitmex":
        old = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1,\
         reverse=False).result()[0][0]['timestamp']
    if source == "binance":
        new = pd.to_datetime(binance_client.get_klines(symbol=symbol,interval=kline_size)[-1][0]\
        , unit='ms')
    if source == "bitmex":
        new = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, count=1,\
         reverse=True).result()[0][0]['timestamp']
    return old, new

In [7]:
def get_all_binance(symbol, kline_size, save = False):
    """Download data from Binance"""
    filename = f'{symbol}-{kline_size}-data.pkl'
    if os.path.isfile(filename):
        data_df = pd.read_csv(filename)
    else:
        data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol,kline_size, data_df, source = "binance")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'):
        print(f'Downloading all available {kline_size} data for {symbol}. Be patient..!')
    else:
        print(f'Downloading {delta_min} minutes of new data available for {symbol}, i.e.' +
        f'{available_data} instances of {kline_size} data.')
    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime\
    ("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close',
     'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else:
        data_df = data
    data_df.set_index('timestamp', inplace=True)
    if save:
        data_df.to_pickle(BIN_PATH + filename)
    print('All caught up..!')
    return data_df

In [8]:

def get_all_bitmex(symbol, kline_size, save = False):
    """Download data from Bitmex"""
    filename = f'{symbol}-{kline_size}-data.pkl'
    if os.path.isfile(filename):
        data_df = pd.read_csv(filename)
    else:
        data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "bitmex")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    rounds = math.ceil(available_data / BATCH_SIZE)
    if rounds > 0:
        print(f'Downloading {delta_min} minutes of new data available for {symbol}, i.e.' +
        f'{available_data} instances of {kline_size} data.')
        for round_num in tqdm_notebook(range(rounds)):
            time.sleep(1)
            new_time = (oldest_point + timedelta(minutes = round_num * BATCH_SIZE * binsizes\
            [kline_size]))
            data = bitmex_client.Trade.Trade_getBucketed(symbol=symbol, binSize=kline_size, \
            count=BATCH_SIZE, startTime = new_time).result()[0]
            temp_df = pd.DataFrame(data)
            data_df = pd.concat([data_df,temp_df], axis = 0)
    data_df.set_index('timestamp', inplace=True)
    if save and rounds > 0:
        data_df.to_pickle(BIT_PATH + filename)
    print('All caught up..!')
    return data_df

In [9]:
#output_path
BIN_PATH = '/content/drive/MyDrive/Crypto Data/Pickle/'
BIT_PATH = 'BITMEX/'
mkdir(BIN_PATH)
mkdir(BIT_PATH)

In [10]:
### API
#Enter your own API-key here
BITMEX_API_KEY = ''
#Enter your own API-secret here
BITMEX_API_SECRET = ''
#Enter your own API-key here
BINANCE_API_KEY = ''
#Enter your own API-secret here
BINANCE_API_SECRET = ''

In [11]:
### CONSTANTS
binsizes = {"1m": 1, "5m": 5, "15m":15, "30m":30,"1h": 60, "1d": 1440}
BATCH_SIZE = 750
bitmex_client = bitmex(test=False, api_key=BITMEX_API_KEY, api_secret=BITMEX_API_SECRET)
binance_client = Client(api_key=BINANCE_API_KEY, api_secret=BINANCE_API_SECRET)

  ref_dict['$ref'], '/'.join(path),


In [12]:
ticker = ['ETHUSDT', 'BTCUSDT', 'XRPUSDT', 'BUSDUSDT', 'USDCUSDT', 'BNBUSDT', 'ADAUSDT', 'LTCUSDT']
size = ["5m", "15m", "30m", "1h"]

In [13]:
batches = [(i,j) for i in ticker for j in size]

In [14]:
batches

[('ETHUSDT', '5m'),
 ('ETHUSDT', '15m'),
 ('ETHUSDT', '30m'),
 ('ETHUSDT', '1h'),
 ('BTCUSDT', '5m'),
 ('BTCUSDT', '15m'),
 ('BTCUSDT', '30m'),
 ('BTCUSDT', '1h'),
 ('XRPUSDT', '5m'),
 ('XRPUSDT', '15m'),
 ('XRPUSDT', '30m'),
 ('XRPUSDT', '1h'),
 ('BUSDUSDT', '5m'),
 ('BUSDUSDT', '15m'),
 ('BUSDUSDT', '30m'),
 ('BUSDUSDT', '1h'),
 ('USDCUSDT', '5m'),
 ('USDCUSDT', '15m'),
 ('USDCUSDT', '30m'),
 ('USDCUSDT', '1h'),
 ('BNBUSDT', '5m'),
 ('BNBUSDT', '15m'),
 ('BNBUSDT', '30m'),
 ('BNBUSDT', '1h'),
 ('ADAUSDT', '5m'),
 ('ADAUSDT', '15m'),
 ('ADAUSDT', '30m'),
 ('ADAUSDT', '1h'),
 ('LTCUSDT', '5m'),
 ('LTCUSDT', '15m'),
 ('LTCUSDT', '30m'),
 ('LTCUSDT', '1h')]

In [32]:
for i in batches:
    df = get_all_binance(i[0], i[1], save = True)

Downloading all available 5m data for ETHUSDT. Be patient..!
All caught up..!
Downloading all available 15m data for ETHUSDT. Be patient..!
All caught up..!
Downloading all available 30m data for ETHUSDT. Be patient..!
All caught up..!
Downloading all available 1h data for ETHUSDT. Be patient..!
All caught up..!
Downloading all available 5m data for BTCUSDT. Be patient..!
All caught up..!
Downloading all available 15m data for BTCUSDT. Be patient..!
All caught up..!
Downloading all available 30m data for BTCUSDT. Be patient..!
All caught up..!
Downloading all available 1h data for BTCUSDT. Be patient..!
All caught up..!
Downloading all available 5m data for XRPUSDT. Be patient..!
All caught up..!
Downloading all available 15m data for XRPUSDT. Be patient..!
All caught up..!
Downloading all available 30m data for XRPUSDT. Be patient..!
All caught up..!
Downloading all available 1h data for XRPUSDT. Be patient..!
All caught up..!
Downloading all available 5m data for BUSDUSDT. Be patient