In [15]:
import ccxt
import pandas as pd
import numpy as np

pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [16]:
# Initialize the exchange
binance = ccxt.binance()

# Define the symbols and timeframe
symbols = ['BTC/USDT', 'ETH/USDT', 'XRP/USDT', 'DOGE/USDT', 'SOL/USDT']
timeframe = '1h'  # 1-hour candles
limit = 1000 # Number of candles to fetch per request
start_date = '2024-01-01T00:00:00Z'

def fetch_data(symbol, timeframe, start_date, limit):
    since = binance.parse8601(start_date)
    symbol_ohlcv = []
    while True:
        ohlcv = binance.fetch_ohlcv(symbol, timeframe, since, limit)
        if len(ohlcv) == 0:
            break
        symbol_ohlcv.extend(ohlcv)
        since = ohlcv[-1][0] + 1
    data = np.array(symbol_ohlcv, dtype=object)
    return np.insert(data, 1, symbol, axis=1)
    
# Fetch the data
all_ohlcv = []
for symbol in symbols:
    data = fetch_data(symbol, timeframe, start_date, limit)
    all_ohlcv.append(data)
    print(f"Fetched {len(data)} candles for {symbol}")

df = pd.DataFrame(np.concatenate(all_ohlcv), columns=['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume'])

Fetched 13555 candles for BTC/USDT
Fetched 13555 candles for ETH/USDT
Fetched 13555 candles for XRP/USDT
Fetched 13555 candles for DOGE/USDT
Fetched 13555 candles for SOL/USDT


In [17]:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)

print(f"Fetched {len(df)} candles")

Fetched 67775 candles


In [18]:
df.head(5)

Unnamed: 0_level_0,symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,BTC/USDT,42283.58,42554.57,42261.02,42475.23,1271.6811
2024-01-01,XRP/USDT,0.6155,0.6172,0.6146,0.6162,14498728.0
2024-01-01,DOGE/USDT,0.0896,0.09,0.0895,0.0898,17799677.0
2024-01-01,SOL/USDT,101.72,102.79,101.56,101.96,196680.93
2024-01-01,ETH/USDT,2281.87,2297.18,2281.27,2295.51,10771.9183


In [19]:
df.to_csv('data/ohlcv.csv.gz', compression='gzip')