In [11]:
import requests
import hashlib
import hmac
import time
import datetime
import pandas as pd
import numpy as np
from urllib.parse import urljoin, urlencode

apikey = 'add apikey'
secret = 'add secret key'

binance = 'https://api.binance.com'

# test = requests.get("https://api.binance.com/api/v1/ping")
# servertime = requests.get("https://api.binance.com/api/v1/time")

headers = {
    'X-MBX-APIKEY': apikey
}

# Get timestamp
def timestamp():
    """
    Normalizes the timestamps from binance
    """
    timestamp = int(time.time() * 1000)
    
    return timestamp

def date_to_timestamp(date_string='01/01/2019'):
    date = datetime.datetime.strptime(date_string, "%d/%m/%Y")
    timestamp = datetime.datetime.timestamp(date)
    
    return timestamp

def signature(timestamp, **params):
    """
    Creates a signature needed for some request to
    binance API
    """
    querystring = urlencode(params)
    params['signature'] = hmac.new(secret.encode('utf-8'), querystring.encode('utf-8'), hashlib.sha256).hexdigest()
    
    return params

def get_price(headers, symbol='BTCUSDT'):
    """
    Gets the current price of a symbol
    """
    path = '/api/v3/ticker/price'
    params = {'symbol': symbol}
    r = requests.get(binance + path, headers=headers, params=params)
    
    return r.json()

print(get_price(headers))

{'symbol': 'BTCUSDT', 'price': '31829.81000000'}


# Get prices

We can use the binance api to get the price of a symbol. If we want to get the historical price of bitcoin, the earliest that we can get is from 2017.

### Meaning of the json response from binance

```python
[
  [
    1499040000000,      // Open time
    "0.01634790",       // Open
    "0.80000000",       // High
    "0.01575800",       // Low
    "0.01577100",       // Close
    "148976.11427815",  // Volume
    1499644799999,      // Close time
    "2434.19055334",    // Quote asset volume
    308,                // Number of trades
    "1756.87402397",    // Taker buy base asset volume
    "28.46694368",      // Taker buy quote asset volume
    "17928899.62484339" // Ignore.
  ]
]
```

In [12]:

# Binance timestaps are in miliseconds so we need to multiply by 1000
start_time = int(date_to_timestamp(f'01/01/{start}') * 1000)

def get_candles(symbol='BTCUSDT', limit=1000, interval='4h', start_time=None, end_time=None):
    """
    Gets the candles from a symbol. The API has a limit of 1000 candles
    for request.
    Args:
        symbol: str. a pair of currencies
        limit: int. max 1000 the binance default is 500
        interval: str. the candle's interval first the number
            then the letter m -> minutes; h -> hours; d -> days; w -> weeks; M -> months
        start_time: int. timestamp in miliseconds
        end_time: int. timestamp in milisencods
    """

    path = '/api/v3/klines'
    params = {'symbol': symbol, 'limit': limit, 'interval': interval, 'startTime': start_time, 'endTime': end_time}
    
    r = requests.get(binance + path, params=params)
    
    return r.json()

# Function that gets the candles from a specific day until present and with an specific interval
def get_all_candles(start_time, end_time=None, interval='4h'):
    """
    Gest all the candles starting from a specific day until
    a specific date. If end_time is None, it will get
    the candles from the start_time until present
    """
    all_candles = []
    
    if end_time == None:
        # current timestamp
        end_time = timestamp()
    
    # We add 2 min (120000), if the end_time is higher than the stating time
    # It means that we are still retrieving all data, and if it is less it means that we are 
    # in the current date and hour.

    while end_time > (start_time + 120000):
        candles = get_candles(start_time=start_time, interval=interval)
        all_candles.append(candles)
        
        # update the start_time, we add 1 second to the time where the last candle ended.
        start_time = all_candles[-1][-1][6] + 1

    
    
    return all_candles


# Format and save


In [13]:
def binance_fromtimestamp(binance_time):
    """
    converts the binance timestamp (miliseconds) to a 
    normal date
    """
    correct_date = datetime.datetime.fromtimestamp(binance_time/1000)
    
    return correct_date

def candles2dataframe(candles):
    """
    Converts a list of candles to a pandas 
    DataFrame
    """

    df = pd.DataFrame(all_candles[0])
    for candles in all_candles[1:]:
        df_candles = pd.DataFrame(candles)
        df = pd.concat([df, df_candles], ignore_index=True)

    return df

def format_df(df):
    # These are the column names from binance api data
    columns=['open_time', 'open', 'high', 'low', 'close', 
            'volume', 'close_time', 'quote_asset_volume','number_of_trades', 
            'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore']

    df.columns = columns

    # Change the timestamp values to real dates.
    df['open_time'] = df.open_time.apply(binance_fromtimestamp)
    df['close_time'] = df.close_time.apply(binance_fromtimestamp)

    # Dropping irrelevant columns
    df.drop(['ignore'], axis=1, inplace=True)

    return df

In [14]:
%cd /content/drive/MyDrive/Colab Notebooks/datasets/bitcoin

/content/drive/MyDrive/Colab Notebooks/datasets/bitcoin


In [16]:
start = '2017'
end = '2020'
intervals = ['1m', '5m', '15m', '30m', '1h', '2h', '4h', '1d', '1w']
symbol = 'BTCUSDT'

for interval in intervals:
    all_candles = get_all_candles(start_time, interval=interval)
    df = candles2dataframe(all_candles)
    df = format_df(df)
    # Save file
    df.to_csv(f'btc_{interval}_{start}_{end}.csv', index=False)
    print(f'saved {interval} df')
    time.sleep(5)

saved 1m df
saved 5m df
saved 15m df
saved 30m df
saved 1h df
saved 2h df
saved 4h df
saved 1d df
saved 1w df
