In [1]:
import requests
import pandas as pd
from datetime import datetime, date
from tqdm import tqdm
import os

In [2]:
def convert_to_binance_timestamp(dt):
    # Convert datetime object to timestamp in milliseconds
    timestamp_ms = int(dt.timestamp() * 1000)
    return timestamp_ms

In [3]:
def create_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [4]:
def get_binance_historical_data(symbol, interval, startTime, endTime=None, limit=1000):
    base_url = "https://api.binance.com/api/v3/klines"

    # Parameters for the API request
    params = {
        'symbol': symbol,       # Trading pair, e.g., 'BTCUSDT'
        'interval': interval,   # Kline interval, e.g., '1h' for 1 hour
        'startTime': startTime, 
        'endTime': endTime,
        'limit': limit          # Number of data points to retrieve
    }

    # Make the API request
    response = requests.get(base_url, params=params)
    data = response.json()

    # Extracting relevant data
    df = pd.DataFrame(data, columns=['timestamp_ms', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])

    # Convert timestamps to human-readable date format
    df['timestamp'] = pd.to_datetime(df['timestamp_ms'], unit='ms')
    return df

In [6]:
def make_historical_data_csv(interval:str, symbol:str='BTCUSDT', datetime_start=datetime(2023,1,1), datetime_end=datetime(2024,3,7)):
    historical_data_df = pd.DataFrame(columns=['timestamp_ms', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore', 'timestamp'])
        
    startTime = convert_to_binance_timestamp(datetime_start)
    endTime = convert_to_binance_timestamp(datetime_end)
    
    new_rows = get_binance_historical_data(symbol, interval, startTime, endTime)
    historical_data_df = pd.concat([historical_data_df,new_rows], ignore_index=True)
    # historical_data_df = historical_data_df.append(new_rows, ignore_index=True)
    
    last_time = historical_data_df.loc[historical_data_df.shape[0]-1,'timestamp_ms']
    ms_diff = last_time - historical_data_df.loc[historical_data_df.shape[0]-2,'timestamp_ms']
    
    while pd.to_datetime(last_time, unit='ms') < datetime_end:
        new_start_time =  last_time + ms_diff
        new_rows = get_binance_historical_data(symbol, interval, new_start_time)
        historical_data_df = pd.concat([historical_data_df,new_rows], ignore_index=True)
        # historical_data_df = historical_data_df.append(new_rows, ignore_index=True)
        last_time = historical_data_df.loc[historical_data_df.shape[0]-1,'timestamp_ms']
        
    condition = historical_data_df['timestamp'] <= datetime_end
    historical_data_df = historical_data_df[condition]
    periodo = f"{datetime_start.year}.{datetime_start.month}.{datetime_start.day}_{datetime_end.year}.{datetime_end.month}.{datetime_end.day}"
    new_dir = rf'./data/binance/{periodo}' + '//'
    create_directory(new_dir)
    historical_data_df.to_csv(rf'{new_dir}{symbol}_{interval}_{periodo}.csv', index=False)
    return historical_data_df

In [7]:
# intervals = ['1d', '12h', '6h', '1h', '30m', '15m']
intervals = ['1d', '12h', '1h']
dt_start = datetime(2021,5,1)
dt_end = datetime(2024,6,23)
for interval in tqdm(intervals):
    make_historical_data_csv(interval, symbol='BTCUSDT', datetime_start=dt_start, datetime_end=dt_end)

100%|██████████| 3/3 [00:22<00:00,  7.40s/it]
