In [104]:
import pandas as pd
import numpy as np
from os import getenv
from binance.client import Client # pip install python-binance
from binance.websockets import BinanceSocketManager
from twisted.internet import reactor
import math
import os.path
import time
from datetime import timedelta, datetime
from dateutil import parser
from tqdm import tqdm_notebook #(Optional, used for progress-bars)
import os
import glob
# Statistical Arbitrage - Pair Trading Strategy

In [105]:
# client login to binance.us
binance_client = Client(getenv('binance_api'), getenv('binance_secret'))
binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
batch_size = 750

In [106]:
def minutes_of_new_data(symbol, kline_size, data, source):
    if len(data) > 0:  old = parser.parse(data["timestamp"].iloc[-1])
    elif source == "binance": old = datetime.strptime('1 Jan 2017', '%d %b %Y')
    if source == "binance": new = pd.to_datetime(binance_client.get_klines(symbol=symbol, interval=kline_size)[-1][0], unit='ms')
    return old, new

def get_all_binance(symbol, kline_size, save = False):
    filename = '%s-%s-data.csv' % (symbol, kline_size)
    if os.path.isfile(filename): data_df= pd.read_csv(filename)
    else: data_df = pd.DataFrame()
    oldest_point, newest_point = minutes_of_new_data(symbol, kline_size, data_df, source = "binance")
    delta_min = (newest_point - oldest_point).total_seconds()/60
    available_data = math.ceil(delta_min/binsizes[kline_size])
    if oldest_point == datetime.strptime('1 Jan 2017', '%d %b %Y'): print('Downloading all available %s data for %s. Be patient..!' % (kline_size, symbol))
    else: print('Downloading %d minutes of new data available for %s, i.e. %d instances of %s data.' % (delta_min, symbol, available_data, kline_size))
    klines = binance_client.get_historical_klines(symbol, kline_size, oldest_point.strftime("%d %b %Y %H:%M:%S"), newest_point.strftime("%d %b %Y %H:%M:%S"))
    for line in klines:
        del line[5:]
    data = pd.DataFrame(klines, columns = ['timestamp', 'open', 'high', 'low', 'close'])
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ms')
    data['symbol'] = symbol
    if len(data_df) > 0:
        temp_df = pd.DataFrame(data)
        data_df = data_df.append(temp_df)
    else: data_df = data
    data_df.set_index('timestamp', inplace=True)
    if save: data_df.to_csv(filename)
    print('All caught up..!')
    return data_df

In [None]:
# valid intervals - 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M
# saves all files in /data directory
binance_symbols = []
tickers = binance_client.get_all_tickers()
for item in tickers:
        binance_symbols.append(item['symbol'])
for symbol in binance_symbols:
    get_all_binance(symbol, '1d', save = True)

In [109]:
globbed_files = glob.glob("*.csv") #creates a list of all csv files

data = [] # pd.concat takes a list of dataframes as an agrument
for csv in globbed_files:
    frame = pd.read_csv(csv)
    frame['symbol'] = os.path.basename(csv)
    data.append(frame)

combined_data = pd.concat(data, ignore_index=True) #dont want pandas to try an align row indexes
#export to csv
combined_data.to_csv('combined_data.csv', index=False, encoding='utf-8-sig')

In [113]:
test = pd.read_csv('combined_data.csv')
test['symbol'] = [x.split("-")[0] for x in test['symbol']]
test.set_index('timestamp', inplace=True)
test = test[['symbol', 'open', 'high', 'low', 'close']]

In [114]:
test

Unnamed: 0_level_0,symbol,open,high,low,close
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-11-01,ARKBTC,0.004980,0.004980,0.000301,0.000319
2017-11-02,ARKBTC,0.000319,0.000363,0.000256,0.000300
2017-11-03,ARKBTC,0.000297,0.000369,0.000283,0.000346
2017-11-04,ARKBTC,0.000349,0.000367,0.000315,0.000315
2017-11-05,ARKBTC,0.000315,0.000340,0.000296,0.000321
...,...,...,...,...,...
2021-02-09,ARPABNB,0.000381,0.000393,0.000283,0.000307
2021-02-10,ARPABNB,0.000308,0.000308,0.000226,0.000270
2021-02-11,ARPABNB,0.000272,0.000330,0.000268,0.000311
2021-02-12,ARPABNB,0.000310,0.000349,0.000297,0.000319
