In [2]:
from pybit.unified_trading import HTTP
import pandas as pd
from datetime import datetime, UTC
import time
import os

def download_bybit_klines(
    asset_name: str,
    data_interval_str: str,
    date_range: str,
    data_path: str,
    testnet: bool = False
):
    session = HTTP(testnet=testnet)

    # Interval mapping for Bybit API
    interval_map = {
        '1m':'1',
        '3m':'3',
        '5m':'5',
        '15m': '15',
        '30m': '30',
        '1h': '60',
        '2h': '120',
        '4h': '240',
        '6h': '360',
        '12h': '720',
        '1d': 'D',
        '1w': 'W',
        '1M': 'M',
    }
    if data_interval_str not in interval_map:
        raise ValueError(f"Unsupported interval: {data_interval_str}")
    
    interval = interval_map[data_interval_str]
    limit = 1000

    # Time gap per 1000 candles in milliseconds
    gap_map = {
        '15': 15 * 60 * 1000,
        '30': 30 * 60 * 1000,
        '60': 60 * 60 * 1000,
        '120': 120 * 60 * 1000,
        '240': 240 * 60 * 1000,
        '360': 360 * 60 * 1000,
        '720': 720 * 60 * 1000,
        'D': 1440 * 60 * 1000,
        'W': 7 * 1440 * 60 * 1000,
        'M': 30 * 1440 * 60 * 1000,
    }
    gap = gap_map[interval] * limit

    # Parse date range
    starttime = datetime.strptime(date_range.split('_')[0], "%Y%m%d").replace(tzinfo=UTC)
    endtime = datetime.strptime(date_range.split('_')[1], "%Y%m%d").replace(tzinfo=UTC)
    starttime_ts = int(starttime.timestamp() * 1000)
    endtime_ts = int(endtime.timestamp() * 1000)

    print(f"Fetching data for {asset_name} ({data_interval_str})...")
    price_data = []
    current_endtime = endtime_ts
    first_data_timestamp = None

    while starttime_ts < current_endtime:
        try:
            candle = session.get_kline(
                category="linear",
                symbol=f"{asset_name}USDT",
                interval=interval,
                end=current_endtime,
                limit=limit,
            )
            candle_data = candle["result"]["list"]
            if not candle_data:
                current_endtime -= gap
                continue

            # Capture earliest available timestamp
            if first_data_timestamp is None:
                first_data_timestamp = int(candle_data[-1][0])

            price_data.extend(candle_data)
            current_endtime -= gap
            time.sleep(0.1)
        except Exception as e:
            print(f"Error fetching data for {asset_name}: {e}")
            break

    if not price_data:
        print(f"No data available for {asset_name} between {starttime} and {endtime}")
        return

    # Process and clean data
    df = pd.DataFrame(price_data)
    df.columns = ["time", "open", "high", "low", "close", "volume", "turnover"]
    df["time"] = df["time"].astype(float)
    df = df.drop_duplicates(subset=["time"])
    df["time"] = pd.to_datetime(df["time"], unit="ms", utc=True)
    df = df.sort_values("time")

    actual_start = df["time"].min()

    if actual_start > starttime:
        print(f"No data found for {asset_name} between {starttime} and {actual_start}")

    # Save CSV
    filename = f"bybit_{asset_name.lower()}_{data_interval_str}_{date_range}.csv"
    filepath = os.path.join(data_path, filename)
    df.to_csv(filepath, index=False)
    print(f"Saved to {filepath}")

download_bybit_klines(
    asset_name='BTC',
    data_interval_str='4h', #1m,3m,5m,15m,30m,60m,120m,240m,360m,720,1d,1w,1M
    date_range='20210101_20241231',
    data_path='/Users/bryanlew/Document/AlgoCrypto/Backend/get_Data'
)


Fetching data for BTC (4h)...
Saved to /Users/bryanlew/Document/AlgoCrypto/Backend/get_Data/bybit_btc_4h_20210101_20241231.csv


In [3]:
def load_data(asset, path, data_interval_str, date_range):
    df = pd.read_csv(f"{path}/{asset1}/bybit_{asset1.lower()}_{data_interval_str}_{date_range}.csv")
    df['time'] = pd.to_datetime(df1['time'])
    prices = df[['time', 'close']].rename(columns={'close': asset1})
    return prices
