In [4]:
# 1. Import
import pandas as pd
import time
from datetime import datetime
from pybit.unified_trading import HTTP

# 2. Config
SYMBOL = 'HYPEUSDT'
INTERVAL_BYBIT = '5'  # Bybitは数字文字列（例: '1','3','5','15','60', '240' ...）
START_DATE = '2025/01/01/00/00'
END_DATE   = '2025/07/30/00/00'
SAVE_DIR   = '01.data'

# 3. Helper Functions
def to_timestamp(date_str: str) -> int:
    dt = datetime.strptime(date_str, '%Y/%m/%d/%H/%M')
    return int(dt.timestamp() * 1000)

def to_datetime(ms: int) -> datetime:
    return datetime.utcfromtimestamp(ms / 1000)

# 4. Bybit Kline Fetch (単独)
def fetch_bybit_kline(symbol: str, interval: str, start_ts: int, end_ts: int) -> pd.DataFrame:
    """
    Bybit Unified Trading API (pybit) を用いて、指定区間のKlineを遡りながら取得します。
    返却: timestamp(UTC, pandas datetime), O,H,L,C,V,T(turnover) のDataFrame（古い→新しい順）
    """
    session = HTTP(testnet=False)
    all_dfs = []
    current_end = end_ts
    step = 1

    while True:
        try:
            response = session.get_kline(
                category='linear',
                symbol=symbol,
                interval=interval,
                start=start_ts,
                end=current_end,
                limit=1000
            )
            # APIは result.list が [ [start, O,H,L,C, V, turnover], ... ]
            result = response.get('result', {})
            rows = result.get('list', []) if result else []
            df = pd.DataFrame(rows)
        except Exception as e:
            print(f"[Error] Bybit fetch failed: {e}")
            break

        if df.empty:
            print("[Info] No more Bybit data.")
            break

        all_dfs.append(df)
        oldest_ms = int(df.iloc[-1][0])  # 最後の行が最も古い
        current_end = oldest_ms - 1
        print(f"[Bybit Step {step}] Oldest: {to_datetime(oldest_ms)} Rows: {len(df)}")
        time.sleep(0.1)
        step += 1

        # 安全ブレーク（無限ループ防止）
        if current_end < start_ts:
            break

    if not all_dfs:
        return pd.DataFrame()

    # 連結 → 古い→新しいへソート → 列名整形
    master_df = pd.concat(all_dfs, ignore_index=True)
    master_df = master_df.iloc[::-1].reset_index(drop=True)
    master_df.columns = ["timestamp", "O", "H", "L", "C", "V", "T"]

    # 型と時刻
    master_df["timestamp"] = master_df["timestamp"].astype('int64')
    master_df["timestamp"] = pd.to_datetime(master_df["timestamp"], unit='ms')

    # 数値列をfloatへ
    for col in ["O","H","L","C","V","T"]:
        master_df[col] = pd.to_numeric(master_df[col], errors='coerce')

    # 重複タイムスタンプ除去（念のため）
    before = len(master_df)
    master_df = master_df.drop_duplicates(subset=['timestamp']).reset_index(drop=True)
    after = len(master_df)
    if before != after:
        print(f"[Info] Dropped {before - after} duplicated bars.")

    return master_df

# 5. Main
def main():
    start_ts = to_timestamp(START_DATE)
    end_ts   = to_timestamp(END_DATE)

    print(f"\nFetching Bybit data only → {SYMBOL} ({INTERVAL_BYBIT}min) ...")
    bybit_df = fetch_bybit_kline(SYMBOL, INTERVAL_BYBIT, start_ts, end_ts)
    if bybit_df.empty:
        print("[Error] No Bybit data fetched.")
        return

    print("\n【品質チェック】")
    # 欠損
    print("\n【欠損値チェック】")
    missing = bybit_df.isnull().sum()
    print(missing)
    if missing.any():
        raise ValueError("欠損値検出: 前処理/補完が必要です")
    else:
        print("✅ 欠損値なし")

    # 等間隔チェック
    print("\n【タイムスタンプ間隔チェック】")
    d = bybit_df.sort_values('timestamp').reset_index(drop=True)
    diff = d['timestamp'].diff().dropna()
    expected_interval = pd.Timedelta(minutes=int(INTERVAL_BYBIT))
    interval_ok = (diff == expected_interval)
    print(f"ずれている箇所数: {(~interval_ok).sum()} 箇所")
    if (~interval_ok).any():
        bad_idx = interval_ok[~interval_ok].index
        print("❗ 等間隔でない例 (最大5件):")
        for idx in bad_idx[:5]:
            prev_ts = d.loc[idx - 1, 'timestamp']
            curr_ts = d.loc[idx, 'timestamp']
            print(f"ズレ: {prev_ts} → {curr_ts}（差分: {curr_ts - prev_ts}）")
        raise ValueError("タイムスタンプ間隔異常")
    else:
        print("✅ 等間隔で並んでいます")

    # 保存
    start_str = datetime.strptime(START_DATE, "%Y/%m/%d/%H/%M").strftime("%Y%m%d")
    end_str   = datetime.strptime(END_DATE,   "%Y/%m/%d/%H/%M").strftime("%Y%m%d")
    file_name = f"{SAVE_DIR}/Market_Bybit_{SYMBOL}_{INTERVAL_BYBIT}min_{start_str}-{end_str}.csv"

    # 保存先フォルダ作成（存在しなければ）
    import os
    os.makedirs(SAVE_DIR, exist_ok=True)

    bybit_df.to_csv(file_name, index=False)
    print(f"[Success] Saved → {file_name}")


if __name__ == "__main__":
    main()



Fetching Bybit data only → HYPEUSDT (5min) ...
[Bybit Step 1] Oldest: 2025-07-26 08:45:00 Rows: 1000
[Bybit Step 2] Oldest: 2025-07-22 21:25:00 Rows: 1000
[Bybit Step 3] Oldest: 2025-07-19 10:05:00 Rows: 1000
[Bybit Step 4] Oldest: 2025-07-15 22:45:00 Rows: 1000
[Bybit Step 5] Oldest: 2025-07-12 11:25:00 Rows: 1000
[Bybit Step 6] Oldest: 2025-07-09 00:05:00 Rows: 1000
[Bybit Step 7] Oldest: 2025-07-05 12:45:00 Rows: 1000
[Bybit Step 8] Oldest: 2025-07-02 01:25:00 Rows: 1000
[Bybit Step 9] Oldest: 2025-06-28 14:05:00 Rows: 1000
[Bybit Step 10] Oldest: 2025-06-25 02:45:00 Rows: 1000
[Bybit Step 11] Oldest: 2025-06-21 15:25:00 Rows: 1000
[Bybit Step 12] Oldest: 2025-06-18 04:05:00 Rows: 1000
[Bybit Step 13] Oldest: 2025-06-14 16:45:00 Rows: 1000
[Bybit Step 14] Oldest: 2025-06-11 05:25:00 Rows: 1000
[Bybit Step 15] Oldest: 2025-06-07 18:05:00 Rows: 1000
[Bybit Step 16] Oldest: 2025-06-04 06:45:00 Rows: 1000
[Bybit Step 17] Oldest: 2025-05-31 19:25:00 Rows: 1000
[Bybit Step 18] Oldest: 20