In [1]:
# Packages
import time
import json
import talib as ta
import requests
import pandas as pd
import numpy as np
import datetime
from urllib.parse import urljoin, urlencode
from dateutil import parser


# Obtian the exchange info

In [2]:
# Obtain the exchange information firstly
def exchangeInfo():
    BASE_URL = 'https://api.binance.com'
    PATH = '/api/v3/exchangeInfo'
    
    url = urljoin(BASE_URL, PATH)
    
    response = requests.get(url)

    data = response.json()
    data = pd.DataFrame(data['symbols'])
    return data

In [3]:
tokenInfo = exchangeInfo()

tokenInfo.head()

Unnamed: 0,symbol,status,baseAsset,baseAssetPrecision,quoteAsset,quotePrecision,quoteAssetPrecision,baseCommissionPrecision,quoteCommissionPrecision,orderTypes,icebergAllowed,ocoAllowed,quoteOrderQtyMarketAllowed,isSpotTradingAllowed,isMarginTradingAllowed,filters,permissions
0,ETHBTC,TRADING,ETH,8,BTC,8,8,8,8,"[LIMIT, LIMIT_MAKER, MARKET, STOP_LOSS_LIMIT, ...",True,True,True,True,True,"[{'filterType': 'PRICE_FILTER', 'minPrice': '0...","[SPOT, MARGIN]"
1,LTCBTC,TRADING,LTC,8,BTC,8,8,8,8,"[LIMIT, LIMIT_MAKER, MARKET, STOP_LOSS_LIMIT, ...",True,True,True,True,True,"[{'filterType': 'PRICE_FILTER', 'minPrice': '0...","[SPOT, MARGIN]"
2,BNBBTC,TRADING,BNB,8,BTC,8,8,8,8,"[LIMIT, LIMIT_MAKER, MARKET, STOP_LOSS_LIMIT, ...",True,True,True,True,True,"[{'filterType': 'PRICE_FILTER', 'minPrice': '0...","[SPOT, MARGIN]"
3,NEOBTC,TRADING,NEO,8,BTC,8,8,8,8,"[LIMIT, LIMIT_MAKER, MARKET, STOP_LOSS_LIMIT, ...",True,True,True,True,True,"[{'filterType': 'PRICE_FILTER', 'minPrice': '0...","[SPOT, MARGIN]"
4,QTUMETH,TRADING,QTUM,8,ETH,8,8,8,8,"[LIMIT, LIMIT_MAKER, MARKET, STOP_LOSS_LIMIT, ...",True,True,True,True,False,"[{'filterType': 'PRICE_FILTER', 'minPrice': '0...",[SPOT]


In [4]:
# tokenInfo.to_csv("Exchange_info.csv")

# example
tokenInfo['symbol'][0]

'ETHBTC'

# obtain the historicl data of tokens

Binance exchange provides from monthly frequency data to minute frequency data.

In [5]:
# timedelta_fre = ['W','D','hours','m','s','ms']
itv = ["1m","3m","5m","15m","30m","1h","2h","4h","6h","8h","12h","1d","3d","1w","1M"]

# historical of token price should be indexed by its symbol.
smb = tokenInfo['symbol']

In [6]:
def start_end_time(startTime,interval,length = 1000):
#     Due to the limit of the api, we need a function to help cut a long time into small period.
#     The function input the start time and interval and return the start time and endtime. 
#     
#     read the time
    startTime = datetime.datetime.strptime(startTime, "%Y-%m-%d %H:%M:%S")
    
#     unit 
    unit = interval
#     transfer the interval frequence
    if unit in ["1m","3m","5m","15m","30m"]:
        unit = 'm'
    elif unit in ["1h","2h","4h","6h","8h","12h"]:
        unit = 'hours'
    elif unit in ["1d","3d"]:
        unit = 'D'
    else:
        unit = 'W'
        
    endTime = pd.to_datetime(startTime)+pd.to_timedelta(length, unit = unit)

    startTime = datetime.datetime.timestamp(startTime)*1000
    endTime = datetime.datetime.timestamp(endTime)*1000
    
    return str(int(startTime)),str(int(endTime))

In [7]:
def klines(symbol,startTime,endTime,interval):
#    This function is aimed to gather the historical price data.    
    BASE_URL = 'https://api.binance.com'
    PATH = '/api/v3/klines'
    
    params = {
        'symbol': symbol,
        'interval': interval,
        'startTime':startTime,
        'endTime':endTime,
        'limit':1000 # 500/1000 is the limit for each request
    }

    url = urljoin(BASE_URL, PATH)

    response = requests.get(url,params=params)

    data = response.json()
    data = pd.DataFrame(data)
#     change the cols
    col = ['Open time','Open','High','Low','Close','Volume','Close time','Quote asset volume','Number of trades',
       'Taker buy base asset volume','Taker buy quote asset volume','Ignore']
    
    if not data.empty:
        data.columns = col
#       transfer the timestamp into time
        data['Open time'] = pd.to_datetime(data['Open time'],unit='ms',utc=True)
        data['Close time'] = pd.to_datetime(data['Close time'],unit='ms',utc=True)
        return data
    else:
        print('No data')

# Sample of data

After build the function of API and exchange information, we can now obtain every token with every period data.

For example, we here use famous 1m BTC as our data sample.

计算features

In [8]:
def max_min_scaler(data:list):
    
    data = data[~np.isnan(data)]
    if not data.empty:
        data =  (data-min(data))/(max(data)-min(data))
        return data.values[-1]
    else:
        return 'NaN'

In [9]:
def multiple_ta(data):
    high = data['High']
    low = data['Low']
    close = data['Close']
    volume = data['Volume']
    
    # ta Overlap Studies Functions && max_min_scaler
    ind_upperband, ind_middleband, ind_lowerband = ta.BBANDS(close, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
    ind_upperband = ind_upperband[-900:]
    ind_middleband = ind_middleband[-900:]
    ind_lowerband = ind_lowerband[-900:]
    
    ind_dema = ta.DEMA(close, timeperiod=30)
    ind_dema = ind_dema[-900:]
    
    ind_ema = ta.EMA(close, timeperiod=30)
    ind_ema = ind_ema[-900:]
    
    ind_ht_trendline = ta.HT_TRENDLINE(close)
    ind_ht_trendline = ind_ht_trendline[-900:]
    
    # Momentum Indicator Functions
    ind_macd, ind_macdsignal, ind_macdhist = ta.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9)
    ind_macd = ind_macd[-900:]
    ind_macdsignal = ind_macdsignal[-900:]
    ind_macdhist =  ind_macdhist[-900:]
    
    ind_mfi = ta.MFI(high, low, close, volume, timeperiod=14)
    ind_mfi = ind_mfi[-900:]
    
    ind_willr = ta.WILLR(high, low, close, timeperiod=14)
    ind_willr = ind_willr[-900:]
    
    # new-20211107
    ind_ADX = ta.ADX(high, low, close, timeperiod=14)
    ind_ADX =  ind_ADX[-900:]
    
    ind_ADXR = ta.ADXR(high, low, close, timeperiod=14)
    ind_ADXR = ind_ADXR[-900:]
    
    ind_APO = ta.APO(close, fastperiod=12, slowperiod=26, matype=0)
    ind_APO = ind_APO[-900:]
    
    ind_aroondown, ind_aroonup = ta.AROON(high, low, timeperiod=14)
    ind_aroondown = ind_aroondown[-900:]
    ind_aroonup = ind_aroonup[-900:]
    
    ind_AROONOSC = ta.AROONOSC(high, low, timeperiod=14)
    ind_AROONOSC = ind_AROONOSC[-900:]
    
    
    
    # Volume Indicator Functions
    ind_ad = ta.AD(high, low, close, volume)
    ind_ad = ind_ad[-900:]
    
    ind_adosc = ta.ADOSC(high, low, close, volume, fastperiod=3, slowperiod=10)
    ind_adosc = ind_adosc[-900:]
    
    ind_obv = ta.OBV(close, volume)
    ind_obv = ind_obv[-900:]
    
    # Volatility Indicator Functions
    ind_atr = ta.ATR(high, low, close, timeperiod=14)
    ind_atr = ind_atr[-900:]
    
    ind_natr = ta.NATR(high, low, close, timeperiod=14)
    ind_natr = ind_natr[-900:]
    
    ind_trange = ta.TRANGE(high, low, close)
    ind_trange =  ind_trange[-900:]
    

    name_list = ['BBANDS_upperband','BBANDS_middleband','BBANDS_lowerband','DEMA','EMA','HT_TRENDLINE', 
                 'macd','macdsignal','macdhist','mfi','willr', 'ADX','ADXR','APO','aroondown','aroonup','AROONOSC',# Momentum Indicator Functions
                'ad','adosc','obv', # Volume Indicator Functions
                'atr','natr','trange']
    
    ta_df = pd.DataFrame(np.array([ind_upperband,ind_middleband,ind_lowerband,ind_dema,ind_ema,ind_ht_trendline, # Overlap Studies Functions
                ind_macd,ind_macdsignal,ind_macdhist,ind_mfi,ind_willr,ind_ADX,ind_ADXR,ind_APO,ind_aroondown,ind_aroonup,ind_AROONOSC,# Momentum Indicator Functions
                ind_ad,ind_adosc,ind_obv, # Volume Indicator Functions
                ind_atr,ind_natr,ind_trange]).T,columns = name_list) # Volatility Indicator Functions


    return ta_df
    
    

In [10]:
i = 11
smb[i]

'BTCUSDT'

In [11]:
itv[11]

'1d'

In [12]:
symbol = smb[i]
interval = itv[11]
# getting data
startTime =  '2019-01-01 00:00:00' #'1627704000000'
[startTime,endTime] = start_end_time(startTime=startTime,interval=interval)
endTime = '1632974400000'#'2021-09-30 00:00:00'
data = klines(symbol=symbol,startTime=startTime,endTime=endTime,interval=interval)

In [13]:
data.iloc[:,1:6] = data.iloc[:,1:6].astype(float)

In [20]:
close = data['Close']
ind_dema = ta.DEMA(close, timeperiod=30)
ind_dema = ind_dema

In [21]:
ind_dema

0               NaN
1               NaN
2               NaN
3               NaN
4               NaN
           ...     
995    46523.612572
996    46087.655006
997    45675.022068
998    45362.628073
999    44953.516458
Length: 1000, dtype: float64

In [16]:
ta_df = multiple_ta(data)
ln_data = data[-900:]
ln_data = ln_data.reset_index(drop=True)
ln_data = pd.concat([ln_data,ta_df],axis = 1)

In [19]:
ln_data['DEMA']

0       4985.048635
1       5019.120125
2       5059.794551
3       5082.407676
4       5120.534844
           ...     
895    46523.612572
896    46087.655006
897    45675.022068
898    45362.628073
899    44953.516458
Name: DEMA, Length: 900, dtype: float64

In [88]:
ln_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 35 columns):
 #   Column                        Non-Null Count  Dtype              
---  ------                        --------------  -----              
 0   Open time                     900 non-null    datetime64[ns, UTC]
 1   Open                          900 non-null    float64            
 2   High                          900 non-null    float64            
 3   Low                           900 non-null    float64            
 4   Close                         900 non-null    float64            
 5   Volume                        900 non-null    float64            
 6   Close time                    900 non-null    datetime64[ns, UTC]
 7   Quote asset volume            900 non-null    object             
 8   Number of trades              900 non-null    int64              
 9   Taker buy base asset volume   900 non-null    object             
 10  Taker buy quote asset volume  900 non-

In [96]:
ln_data.to_csv('timeseries_data_btcusdt.csv',encoding="utf-8")