In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [86]:
# To ensure our src module can be found and imported
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import subprocess
import time
import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path

    
from src.data.binance_downloader import download_historical_daily_klines

In [25]:
DATA_DIR = Path.cwd().parent / 'data'
RAW_DATA_DIR = DATA_DIR / 'raw'
PROCESSED_DATA_DIR = DATA_DIR / 'processed'

BINANCE_HISTORICAL_DATA_DIR = RAW_DATA_DIR / 'binance_historical'
BINANCE_HISTORICAL_FILES_DIR = BINANCE_HISTORICAL_DATA_DIR / 'data/spot/daily/klines/BTCUSDT/1m'
BINANCE_HISTORICAL_DF_PATH = PROCESSED_DATA_DIR / 'binance_historical_df.csv'

TRADING_TYPE = 'spot'
TICKER_SYMBOLS = ['BTCUSDT']
INTERVALS = ['1m']
# No available data before 2021-03-01
START_DATE = '2021-03-01'
END_DATE = (datetime.utcnow() - timedelta(days=1) ).strftime('%Y-%m-%d')
# Reference: https://github.com/binance/binance-public-data/tree/master
RAW_DF_HEADERS = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'num_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore']

# Ensure directories are present
BINANCE_HISTORICAL_DATA_DIR.mkdir(parents=True, exist_ok=True)

In [26]:
download_historical_daily_klines(TRADING_TYPE, 
                                 TICKER_SYMBOLS, 
                                 len(TICKER_SYMBOLS), 
                                 INTERVALS, 
                                 START_DATE, 
                                 END_DATE, 
                                 str(BINANCE_HISTORICAL_DATA_DIR))

Found 1 symbols
[1/1] - start download daily BTCUSDT klines 

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2021-03-01.zip

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2021-03-02.zip

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2021-03-03.zip

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2021-03-04.zip

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUSDT-1m-2021-03-05.zip

file already exists! /Users/jonathanlim/workspace/personal/bitcoin-god/data/raw/binance_historical/data/spot/daily/klines/BTCUSDT/1m/BTCUS

In [27]:
# Read all files in BINANCE_HISTORICAL_FILES_DIR
files = sorted([str(path) for path in BINANCE_HISTORICAL_FILES_DIR.glob('**/*') if path.is_file()])

In [28]:
df_list = [pd.read_csv(path, names=RAW_DF_HEADERS) for path in files]
historical_df = pd.concat(df_list, axis=0, ignore_index=True)

In [29]:
historical_df

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,1614556800000,45134.11,45266.77,45130.34,45260.74,72.517978,1614556859999,3.277691e+06,2207,33.689150,1.522869e+06,0
1,1614556860000,45252.67,45362.07,45250.64,45356.00,65.371778,1614556919999,2.961835e+06,2028,32.499895,1.472609e+06,0
2,1614556920000,45356.00,45371.41,45104.36,45128.57,128.114624,1614556979999,5.795551e+06,2706,47.268294,2.138666e+06,0
3,1614556980000,45128.57,45194.65,45020.87,45037.36,59.964922,1614557039999,2.706678e+06,1502,25.519749,1.152231e+06,0
4,1614557040000,45036.62,45107.01,44977.82,45032.48,57.852895,1614557099999,2.605675e+06,1250,22.489983,1.013207e+06,0
...,...,...,...,...,...,...,...,...,...,...,...,...
976841,1673222100000,17071.38,17075.52,17066.03,17072.40,163.906720,1673222159999,2.798155e+06,4434,78.838300,1.345936e+06,0
976842,1673222160000,17071.92,17084.65,17070.80,17081.43,196.225080,1673222219999,3.351317e+06,5390,114.085820,1.948437e+06,0
976843,1673222220000,17081.75,17176.99,17081.31,17116.89,1629.484480,1673222279999,2.791647e+07,24701,1012.595750,1.734438e+07,0
976844,1673222280000,17117.64,17124.17,17102.96,17108.47,383.824150,1673222339999,6.568321e+06,7708,182.448170,3.122237e+06,0


In [30]:
historical_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 976846 entries, 0 to 976845
Data columns (total 12 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   open_time                     976846 non-null  int64  
 1   open                          976846 non-null  float64
 2   high                          976846 non-null  float64
 3   low                           976846 non-null  float64
 4   close                         976846 non-null  float64
 5   volume                        976846 non-null  float64
 6   close_time                    976846 non-null  int64  
 7   quote_asset_volume            976846 non-null  float64
 8   num_trades                    976846 non-null  int64  
 9   taker_buy_base_asset_volume   976846 non-null  float64
 10  taker_buy_quote_asset_volume  976846 non-null  float64
 11  ignore                        976846 non-null  int64  
dtypes: float64(8), int64(4)
memory usage: 89.4 M

In [31]:
historical_df.to_csv(BINANCE_HISTORICAL_DF_PATH, index=False)

## Get today's klines using the real-time API

In [None]:
from binance.spot import Spot

In [47]:
client = Spot()

# Get server timestamp
print(client.time())



{'serverTime': 1673279038648}


In [120]:
historical_end_time = int(historical_df.iloc[-1]['close_time'])
start_time = historical_end_time + 1

# Get all klines today up to the latest recorded minute
realtime_klines = []
# API limits max 1000 klines in response
new_klines = client.klines("BTCUSDT", "1m", startTime=start_time, limit=1000)
realtime_klines.extend(new_klines)
start_time = new_klines[-1][6] + 1

# To handle the case where more than 1000 minutes have elapsed in the day already
while len(new_klines) > 0:
    new_klines = client.klines("BTCUSDT", "1m", startTime=start_time, limit=1000)
    realtime_klines.extend(new_klines)

In [121]:
curr_unix_time = time.mktime(datetime.now().timetuple())
print(curr_unix_time)

1673281040.0


In [122]:
realtime_klines[-1]

[1673281020000,
 '17297.48000000',
 '17298.49000000',
 '17290.88000000',
 '17292.10000000',
 '122.19766000',
 1673281079999,
 '2113366.11614120',
 2470,
 '62.22869000',
 '1076208.36593120',
 '0']

In [124]:
realtime_df = pd.DataFrame(realtime_klines, columns=RAW_DF_HEADERS)
realtime_df

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,1673222400000,17127.83000000,17147.75000000,17123.57000000,17134.66000000,538.76225000,1673222459999,9232125.85928820,11008,250.88476000,4299331.60555850,0
1,1673222460000,17134.66000000,17145.43000000,17118.95000000,17132.90000000,418.25480000,1673222519999,7164590.43544050,9139,199.01358000,3409198.81366610,0
2,1673222520000,17132.90000000,17134.81000000,17120.00000000,17129.40000000,408.37115000,1673222579999,6993937.35815930,8807,188.19948000,3223299.54649900,0
3,1673222580000,17128.67000000,17132.00000000,17117.14000000,17117.50000000,263.82238000,1673222639999,4518066.11394660,7125,115.42370000,1976778.81334990,0
4,1673222640000,17117.50000000,17135.24000000,17114.86000000,17129.38000000,343.36389000,1673222699999,5880991.03775770,7730,192.14734000,3291164.68970630,0
...,...,...,...,...,...,...,...,...,...,...,...,...
973,1673280780000,17292.56000000,17297.80000000,17291.15000000,17295.14000000,97.07683000,1673280839999,1678917.76839170,3429,47.58820000,823034.27878240,0
974,1673280840000,17295.14000000,17303.45000000,17294.40000000,17302.27000000,182.23811000,1673280899999,3152590.40766530,4606,96.76949000,1674062.77487550,0
975,1673280900000,17302.68000000,17313.43000000,17301.91000000,17307.98000000,282.83259000,1673280959999,4895348.68009700,6390,161.05617000,2787652.42500420,0
976,1673280960000,17307.98000000,17319.30000000,17297.30000000,17297.30000000,294.21089000,1673281019999,5092386.90097590,7240,142.62031000,2468669.48594240,0
