In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

In [2]:
%matplotlib inline
sns.set_style(style='whitegrid')

In [3]:
from keys import keys
from binance.client import Client

In [4]:
client = Client(api_key=keys.apiKey, api_secret=keys.secretKey)

In [5]:
"""
The various columns used by Binance
taken from:
https://python-binance.readthedocs.io/en/latest/binance.html#module-binance.client
"""

columns = ['Open Time', 'Open', 'High', 'Low', 'Close',
            'Volume', 'Close Time', 'Quote asset volume', 
            'n_trades', 'Taker buy base asset volume', 
            'Taker buy quote asset volume', 'Ignore']

In [180]:
"""
DOWNLOADS DATA FROM BINANCE IN BATCHES OF 500 items

INPUTS:
    n_iterations: how many batches of 500 items to download
    
    currency: one of the varios currencies, examples: 
                'BTCUSDT', 'LTCUSDT' ...etc
    interval: dictionary, example: interval_5min = { 
                                        'interval' : Client.KLINE_INTERVAL_5MINUTE,
                                        'interval_diff' : 300000 
                                    }
            interval_diff: the difference in millisec of of new_data[1][0] - new_data[0][0]
            (the difference in binance timeframes of misurations)
"""
def get_by_intervals(n_iterations, currencies, interval):
    
    #columns template
    columns = ['Open Time', 'Open', 'High', 'Low', 'Close',
            'Volume', 'Close Time', 'Quote asset volume', 
            'n_trades', 'Taker buy base asset volume', 
            'Taker buy quote asset volume', 'Ignore']
    
    #most recent timestamp to start downloading from
    starting_point = client.get_klines(symbol=currencies[0], 
                                  interval=interval['interval'],
                                  limit=1
                                )[-1][0]
    
    main_df = pd.DataFrame()
    
    
    for currency in currencies:
        currency_cols = []
        for col in columns:
            currency_cols.append(currency+' '+col)
        
        specific_currency_df = pd.DataFrame(columns=currency_cols)
        
        #downloading n iterations of a specific currency
        for i in range(0,n_iterations):
            if specific_currency_df.empty:
                new_data = client.get_klines(symbol=currency, 
                                  interval=interval['interval'],
                                  endTime = starting_point
                                )
            else:
                end_time = specific_currency_df[currency+' Open Time'].iloc[0] - interval['interval_diff']
                new_data = client.get_klines(symbol=currency, 
                                  interval=interval['interval'],
                                  endTime=end_time
                                 )

            new_data_df = pd.DataFrame(new_data, columns=currency_cols)
            specific_currency_df = pd.concat([new_data_df,specific_currency_df])
        
        main_df = pd.concat([main_df, specific_currency_df],axis=1)
    return main_df

In [181]:
"""
interval_diff: new_data[1][0] - new_data[0][0]
"""

interval_1min = { 'interval' : Client.KLINE_INTERVAL_1MINUTE,
                     'interval_diff': 60000 }

interval_5min = { 'interval' : Client.KLINE_INTERVAL_5MINUTE,
                    'interval_diff': 300000 }

interval_15min = { 'interval' : Client.KLINE_INTERVAL_15MINUTE,
                     'interval_diff': 900000 }

In [182]:
"""
USED TO TEST IF TIMESTAMPS ARE ALIGNED AND THERE ARE NO SUDDEN JUMPS

if it doesn't output anything we're good
"""

def test_time_skip(i_init,i_end,df):
    for i in range(i_init,i_end,currencies):
        if (df['Open Time'].iloc[i+1] - df['Open Time'].iloc[i] != 300000):
            print('FUUUCK',(df['Open Time'].iloc[i+1] - df['Open Time'].iloc[i]), 
                df['Open Time'].iloc[i+1], df['Open Time'].iloc[i],i)

In [183]:
ltc_test = get_by_intervals(4, ['LTCUSDT','BTCEUR'], interval_15min)

In [184]:
ltc_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2000 entries, 0 to 499
Data columns (total 24 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   LTCUSDT Open Time                     2000 non-null   object
 1   LTCUSDT Open                          2000 non-null   object
 2   LTCUSDT High                          2000 non-null   object
 3   LTCUSDT Low                           2000 non-null   object
 4   LTCUSDT Close                         2000 non-null   object
 5   LTCUSDT Volume                        2000 non-null   object
 6   LTCUSDT Close Time                    2000 non-null   object
 7   LTCUSDT Quote asset volume            2000 non-null   object
 8   LTCUSDT n_trades                      2000 non-null   object
 9   LTCUSDT Taker buy base asset volume   2000 non-null   object
 10  LTCUSDT Taker buy quote asset volume  2000 non-null   object
 11  LTCUSDT Ignore                 

In [185]:
print(ltc_test['LTCUSDT Open Time'][0])
print(ltc_test['BTCEUR Open Time'][0])

0    1581257700000
0    1581707700000
0    1582178400000
0    1582628400000
Name: LTCUSDT Open Time, dtype: object
0    1581257700000
0    1581707700000
0    1582178400000
0    1582628400000
Name: BTCEUR Open Time, dtype: object


In [186]:
ltc_test[ ltc_test['BTCEUR Open Time'].duplicated()]

Unnamed: 0,LTCUSDT Open Time,LTCUSDT Open,LTCUSDT High,LTCUSDT Low,LTCUSDT Close,LTCUSDT Volume,LTCUSDT Close Time,LTCUSDT Quote asset volume,LTCUSDT n_trades,LTCUSDT Taker buy base asset volume,...,BTCEUR High,BTCEUR Low,BTCEUR Close,BTCEUR Volume,BTCEUR Close Time,BTCEUR Quote asset volume,BTCEUR n_trades,BTCEUR Taker buy base asset volume,BTCEUR Taker buy quote asset volume,BTCEUR Ignore


In [12]:
#test_time_skip(0,999,ltc_test)

In [13]:
ltc_test[['Open','Close', 'High','Low', 'Volume', 'Quote asset volume', 
            'Taker buy base asset volume', 'Taker buy quote asset volume'
           ]] = ltc_test.columns.drop(''.astype(float)

KeyError: "None of [Index(['Open', 'Close', 'High', 'Low', 'Volume', 'Quote asset volume',\n       'Taker buy base asset volume', 'Taker buy quote asset volume'],\n      dtype='object')] are in the [columns]"

In [None]:
ltc_test['Close Time'] = ltc_test['Close Time'].apply(
    lambda x: datetime.fromtimestamp(int(x/1000)))
ltc_test['Open Time'] = ltc_test['Open Time'].apply(
    lambda x: datetime.fromtimestamp(int(x/1000)))

In [None]:
ltc_test.info()

In [None]:
print('Data timeframe:')
print( 'From:  ',ltc_test['Close Time'].max())
print( 'To     ',ltc_test['Close Time'].min())

In [33]:
plt.figure(figsize=(16,8))
ltc_test['Close'].iloc[:500].plot()

KeyError: 'Close'

<Figure size 1152x576 with 0 Axes>

In [172]:
new_data = client.get_klines(symbol='BTCEUR', 
    interval=Client.KLINE_INTERVAL_1MINUTE )

In [173]:
new_data[0]

[1583048160000,
 '7750.00000000',
 '7750.00000000',
 '7750.00000000',
 '7750.00000000',
 '0.00000000',
 1583048219999,
 '0.00000000',
 0,
 '0.00000000',
 '0.00000000',
 '0']

In [174]:
second_new = client.get_klines(symbol='LTCUSDT', 
    interval=Client.KLINE_INTERVAL_1MINUTE,
                              endTime=new_data[-1][0])

In [175]:
second_new[0]

[1583048160000,
 '58.50000000',
 '58.67000000',
 '58.49000000',
 '58.64000000',
 '436.30346000',
 1583048219999,
 '25564.56612150',
 78,
 '233.54042000',
 '13685.14286370',
 '0']