## Download OHLCV data

In [1]:
import ccxt
import pandas as pd
import numpy as np

pd.set_option('display.float_format', lambda x: '%.4f' % x)

In [2]:
# Initialize the exchange
binance = ccxt.binance()

# Define the symbols and timeframe
symbols = ['BTC/USDT', 'ETH/USDT', 'XRP/USDT', 'DOGE/USDT', 'SOL/USDT']
timeframe = '1h'  # 1-hour candles
limit = 1000 # Number of candles to fetch per request
start_date = '2023-01-01T00:00:00Z'

def fetch_data(symbol, timeframe, start_date, limit):
    since = binance.parse8601(start_date)
    symbol_ohlcv = []
    while True:
        ohlcv = binance.fetch_ohlcv(symbol, timeframe, since, limit)
        if len(ohlcv) == 0:
            break
        symbol_ohlcv.extend(ohlcv)
        since = ohlcv[-1][0] + 1
    data = np.array(symbol_ohlcv, dtype=object)
    return np.insert(data, 1, symbol, axis=1)
    
# Fetch the data
all_ohlcv = []
for symbol in symbols:
    data = fetch_data(symbol, timeframe, start_date, limit)
    all_ohlcv.append(data)
    print(f"Fetched {len(data)} candles for {symbol}")

df = pd.DataFrame(np.concatenate(all_ohlcv), columns=['timestamp', 'symbol', 'open', 'high', 'low', 'close', 'volume'])

Fetched 22331 candles for BTC/USDT
Fetched 22331 candles for ETH/USDT
Fetched 22331 candles for XRP/USDT
Fetched 22331 candles for DOGE/USDT
Fetched 22331 candles for SOL/USDT


In [3]:
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)
df.sort_index(inplace=True)

print(f"Fetched {len(df)} candles")

Fetched 111655 candles


In [4]:
df.head(5)

Unnamed: 0_level_0,symbol,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-01,BTC/USDT,16541.77,16545.7,16508.39,16529.67,4364.8357
2023-01-01,SOL/USDT,9.97,10.02,9.93,9.99,126479.67
2023-01-01,ETH/USDT,1196.13,1196.7,1192.72,1194.09,5889.384
2023-01-01,DOGE/USDT,0.0702,0.0703,0.0694,0.0698,40611931.0
2023-01-01,XRP/USDT,0.3389,0.339,0.3377,0.3385,11231966.0


In [5]:
df.to_csv('data/ohlcv.csv.gz', compression='gzip')