In [2]:
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
import pandas as pd
import dateutil.parser as dp
from datetime import datetime
from datetime import timedelta  
import time
from datetime import timezone
session = Session()
import os


#converts pyton datatime to unix milisecond timestamp utc timezone
def to_utcunix(x):
    return int(x.replace(tzinfo=timezone.utc).timestamp()) * 1000

#Generator for binance price data
def get_data(start, batch_size, end = datetime.now(), symbol = "BTCUSDT", interval = "1m"):
    batch_start = start
    
    while True:
        batch_end = batch_start + timedelta(minutes = batch_size)
        
        if(batch_end > end):
            batch_end = end - timedelta(minutes = 1)
        
            if(batch_start >= end- timedelta(minutes = 1)):
                break
        
        base = "https://api.binance.com/api/v3"

        print(batch_start, batch_end)
        
        url = base + "/klines?symbol={}&interval={}&startTime={}&endTime={}&limit={}".format(symbol, interval,
                                                                                     to_utcunix(batch_start)
                                                                                    , to_utcunix(batch_end), batch_size)
        
        data = session.get(url).json()
        batch_start = batch_end
        
        #sleep to stay under binance maximum call rate
        time.sleep(0.15)
        
        #print(data)
        yield pd.DataFrame.from_dict(data)[[0,1,2,3,4,5]]

#save binance price data as csv
def gen_csv(start, filename, end = datetime.now(), symbol = "BTCUSDT", interval = "1m"):
    batch_size = 1000
    d = get_data(start, batch_size, end = end, symbol = symbol, interval = interval)
        
    try:
        os.mkdir("data_" + interval)
    except:
        pass
    
    fn = "data_" + interval + "/" + filename + ".csv"
    pd.DataFrame([["Open Time", "Open", "High",  "Low",  "Close",  "Volume"]]).to_csv(fn, mode = 'a', header = None)
    for i in d:
        i.to_csv(fn, mode = 'a', header = None)#, header = ["Open Time", "Open", "High",  "Low",  "Close",  "Volume"])

#simplified gen_csv function
def gen_csv_year(year, symbol, interval = "1m"):
    gen_csv(datetime(year,1,1) , "{}_{}".format(year, symbol) ,datetime(year + 1,1,1), symbol, interval )
    

#for i in range(2016, 2021):
#    gen_csv_year(i, "ETHUSDT")    


In [5]:
gen_csv_year(2020, "ETHUSDT", interval = "5m")

2020-01-01 00:00:00 2020-01-01 16:40:00
2020-01-01 16:40:00 2020-01-02 09:20:00
2020-01-02 09:20:00 2020-01-03 02:00:00
2020-01-03 02:00:00 2020-01-03 18:40:00
2020-01-03 18:40:00 2020-01-04 11:20:00
2020-01-04 11:20:00 2020-01-05 04:00:00
2020-01-05 04:00:00 2020-01-05 20:40:00
2020-01-05 20:40:00 2020-01-06 13:20:00
2020-01-06 13:20:00 2020-01-07 06:00:00
2020-01-07 06:00:00 2020-01-07 22:40:00
2020-01-07 22:40:00 2020-01-08 15:20:00
2020-01-08 15:20:00 2020-01-09 08:00:00
2020-01-09 08:00:00 2020-01-10 00:40:00
2020-01-10 00:40:00 2020-01-10 17:20:00
2020-01-10 17:20:00 2020-01-11 10:00:00
2020-01-11 10:00:00 2020-01-12 02:40:00
2020-01-12 02:40:00 2020-01-12 19:20:00
2020-01-12 19:20:00 2020-01-13 12:00:00
2020-01-13 12:00:00 2020-01-14 04:40:00
2020-01-14 04:40:00 2020-01-14 21:20:00
2020-01-14 21:20:00 2020-01-15 14:00:00
2020-01-15 14:00:00 2020-01-16 06:40:00
2020-01-16 06:40:00 2020-01-16 23:20:00
2020-01-16 23:20:00 2020-01-17 16:00:00
2020-01-17 16:00:00 2020-01-18 08:40:00


2020-05-22 08:40:00 2020-05-23 01:20:00
2020-05-23 01:20:00 2020-05-23 18:00:00
2020-05-23 18:00:00 2020-05-24 10:40:00
2020-05-24 10:40:00 2020-05-25 03:20:00
2020-05-25 03:20:00 2020-05-25 20:00:00
2020-05-25 20:00:00 2020-05-26 12:40:00
2020-05-26 12:40:00 2020-05-27 05:20:00
2020-05-27 05:20:00 2020-05-27 22:00:00
2020-05-27 22:00:00 2020-05-28 14:40:00
2020-05-28 14:40:00 2020-05-29 07:20:00
2020-05-29 07:20:00 2020-05-30 00:00:00
2020-05-30 00:00:00 2020-05-30 16:40:00
2020-05-30 16:40:00 2020-05-31 09:20:00
2020-05-31 09:20:00 2020-06-01 02:00:00
2020-06-01 02:00:00 2020-06-01 18:40:00
2020-06-01 18:40:00 2020-06-02 11:20:00
2020-06-02 11:20:00 2020-06-03 04:00:00
2020-06-03 04:00:00 2020-06-03 20:40:00
2020-06-03 20:40:00 2020-06-04 13:20:00
2020-06-04 13:20:00 2020-06-05 06:00:00
2020-06-05 06:00:00 2020-06-05 22:40:00
2020-06-05 22:40:00 2020-06-06 15:20:00
2020-06-06 15:20:00 2020-06-07 08:00:00
2020-06-07 08:00:00 2020-06-08 00:40:00
2020-06-08 00:40:00 2020-06-08 17:20:00


KeyError: "None of [Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')] are in the [columns]"

In [4]:
gen_csv_year(2020, "BTCUSDT", interval = "5m")

2020-01-01 00:00:00 2020-01-01 16:40:00
2020-01-01 16:40:00 2020-01-02 09:20:00
2020-01-02 09:20:00 2020-01-03 02:00:00
2020-01-03 02:00:00 2020-01-03 18:40:00
2020-01-03 18:40:00 2020-01-04 11:20:00
2020-01-04 11:20:00 2020-01-05 04:00:00
2020-01-05 04:00:00 2020-01-05 20:40:00
2020-01-05 20:40:00 2020-01-06 13:20:00
2020-01-06 13:20:00 2020-01-07 06:00:00
2020-01-07 06:00:00 2020-01-07 22:40:00
2020-01-07 22:40:00 2020-01-08 15:20:00
2020-01-08 15:20:00 2020-01-09 08:00:00
2020-01-09 08:00:00 2020-01-10 00:40:00
2020-01-10 00:40:00 2020-01-10 17:20:00
2020-01-10 17:20:00 2020-01-11 10:00:00
2020-01-11 10:00:00 2020-01-12 02:40:00
2020-01-12 02:40:00 2020-01-12 19:20:00
2020-01-12 19:20:00 2020-01-13 12:00:00
2020-01-13 12:00:00 2020-01-14 04:40:00
2020-01-14 04:40:00 2020-01-14 21:20:00
2020-01-14 21:20:00 2020-01-15 14:00:00
2020-01-15 14:00:00 2020-01-16 06:40:00
2020-01-16 06:40:00 2020-01-16 23:20:00
2020-01-16 23:20:00 2020-01-17 16:00:00
2020-01-17 16:00:00 2020-01-18 08:40:00


2020-05-22 08:40:00 2020-05-23 01:20:00
2020-05-23 01:20:00 2020-05-23 18:00:00
2020-05-23 18:00:00 2020-05-24 10:40:00
2020-05-24 10:40:00 2020-05-25 03:20:00
2020-05-25 03:20:00 2020-05-25 20:00:00
2020-05-25 20:00:00 2020-05-26 12:40:00
2020-05-26 12:40:00 2020-05-27 05:20:00
2020-05-27 05:20:00 2020-05-27 22:00:00
2020-05-27 22:00:00 2020-05-28 14:40:00
2020-05-28 14:40:00 2020-05-29 07:20:00
2020-05-29 07:20:00 2020-05-30 00:00:00
2020-05-30 00:00:00 2020-05-30 16:40:00
2020-05-30 16:40:00 2020-05-31 09:20:00
2020-05-31 09:20:00 2020-06-01 02:00:00
2020-06-01 02:00:00 2020-06-01 18:40:00
2020-06-01 18:40:00 2020-06-02 11:20:00
2020-06-02 11:20:00 2020-06-03 04:00:00
2020-06-03 04:00:00 2020-06-03 20:40:00
2020-06-03 20:40:00 2020-06-04 13:20:00
2020-06-04 13:20:00 2020-06-05 06:00:00
2020-06-05 06:00:00 2020-06-05 22:40:00
2020-06-05 22:40:00 2020-06-06 15:20:00
2020-06-06 15:20:00 2020-06-07 08:00:00
2020-06-07 08:00:00 2020-06-08 00:40:00
2020-06-08 00:40:00 2020-06-08 17:20:00


KeyError: "None of [Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')] are in the [columns]"