## Download OHLCV data

In [1]:
import ccxt
import pandas as pd
import numpy as np

pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [2]:
# Initialize the exchange
binance = ccxt.binance()

In [3]:
# Define the symbols and timeframe
symbols = ['BTC/USDT', 'ETH/USDT', 'XRP/USDT', 'DOGE/USDT', 'SOL/USDT']
timeframe = '1h'  # 1-hour candles
limit = 1000 # Number of candles to fetch per request
start_date = '2023-01-01T00:00:00Z'

In [4]:
def fetch_data(symbol, timeframe, start_date, limit):
    since = binance.parse8601(start_date)
    symbol_ohlcv = []
    while True:
        ohlcv = binance.fetch_ohlcv(symbol, timeframe, since, limit)
        if len(ohlcv) == 0:
            break
        symbol_ohlcv.extend(ohlcv)
        since = ohlcv[-1][0] + 1
    data = np.array(symbol_ohlcv, dtype=object)
    return np.insert(data, 1, symbol, axis=1)

In [None]:
 # Fetch the data
all_ohlcv = []
for symbol in symbols:
    data = fetch_data(symbol, timeframe, start_date, limit)
    all_ohlcv.append(data)
    print(f"Fetched {len(data)} candles for {symbol}")

df = pd.DataFrame(np.concatenate(all_ohlcv), columns=['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume'])

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)

print(f"Fetched {len(df)} candles")

In [None]:
df.head(5)

In [None]:
df.to_csv('data/ohlcv.csv.gz', compression='gzip')

In [7]:
# slippage data - just a small ammount for now
slippage_start_date = '2025-07-01T00:01:00Z'

 # Fetch the data
all_slippage = []
for symbol in symbols:
    data = fetch_data(symbol, timeframe, slippage_start_date, limit)
    all_slippage.append(data)
    print(f"Fetched {len(data)} candles for {symbol}")

slippage_df = pd.DataFrame(np.concatenate(all_slippage), columns=['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume'])
slippage_df['timestamp'] = pd.to_datetime(slippage_df['timestamp'], unit='ms')
slippage_df.set_index('timestamp', inplace=True)
slippage_df.sort_index(inplace=True)
slippage_df.head(5)

Fetched 475 candles for BTC/USDT
Fetched 475 candles for ETH/USDT
Fetched 475 candles for XRP/USDT
Fetched 475 candles for DOGE/USDT
Fetched 475 candles for SOL/USDT


Unnamed: 0_level_0,symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-07-01 01:00:00,BTC/USDT,107377.03,107540.0,107171.42,107220.0,219.6465
2025-07-01 01:00:00,ETH/USDT,2493.35,2500.8,2486.83,2488.78,7841.1991
2025-07-01 01:00:00,SOL/USDT,154.76,155.22,154.3,154.39,81261.533
2025-07-01 01:00:00,DOGE/USDT,0.1654,0.1663,0.165,0.1651,19300250.0
2025-07-01 01:00:00,XRP/USDT,2.2384,2.2537,2.2277,2.2305,4630010.9


In [8]:
slippage_df.to_csv('data/ohlcv_slippage.csv.gz', compression='gzip')