# Binance
### the world's largest cryptocurrency exchange by trading volume


# Upbit
### the largest cryptocurrency exchange in South Korea

In [None]:
## Data Crawling

import requests
import pandas as pd
import time


# For Binance

def get_binance_ohlcv(symbol="BTCUSDT", interval="1d", start="2024-01-01", end="2026-02-18"):
    url = "https://fapi.binance.com/fapi/v1/klines"
    start_ms = int(pd.Timestamp(start).timestamp() * 1000)
    end_ms   = int(pd.Timestamp(end).timestamp() * 1000)
    
    all_data = []
    while start_ms < end_ms:
        params = {"symbol": symbol, "interval": interval,
                  "startTime": start_ms, "endTime": end_ms, "limit": 1000}
        resp = requests.get(url, params=params).json()
        if not resp:
            break
        all_data.extend(resp)
        start_ms = resp[-1][0] + 1  # move to next timestamp for next candle

    df = pd.DataFrame(all_data, columns=[
        "open_time","open","high","low","close","volume",
        "close_time","quote_volume","trades",
        "taker_buy_base","taker_buy_quote","ignore"
    ])
    df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
    df[["open","high","low","close","volume"]] = df[["open","high","low","close","volume"]].astype(float)
    return df

# For Upbit

def get_upbit_ohlcv(market="KRW-BTC", start="2024-01-01", end="2026-02-18"):
    url = "https://api.upbit.com/v1/candles/days"
    end_dt = pd.Timestamp(end)
    start_dt = pd.Timestamp(start)
    
    all_data = []
    cursor = end_dt

    while cursor > start_dt:
        params = {"market": market, "count": 200, "to": cursor.strftime("%Y-%m-%dT%H:%M:%SZ")} # maximum 200 candles per request
        resp = requests.get(url, params=params).json()
        if not resp:
            break
        all_data.extend(resp)
        cursor = pd.Timestamp(resp[-1]["candle_date_time_utc"]) - pd.Timedelta(days=1)
        time.sleep(0.1)  # for rate limits

    df = pd.DataFrame(all_data)
    df["candle_date_time_utc"] = pd.to_datetime(df["candle_date_time_utc"])
    df = df[df["candle_date_time_utc"] >= start_dt].sort_values("candle_date_time_utc")
    return df



bi_df = get_binance_ohlcv()
up_df = get_upbit_ohlcv()

# Data Cleaning

In [18]:
len(up_df), len(bi_df)

(776, 780)

# Data Split
split = int
