# 保存币安合约历史数据

本文档用于下载和保存币安合约的历史K线数据。

In [19]:
import ccxt
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import os
import json
from tqdm import tqdm

In [20]:
# 设置数据获取参数
params = {
    # 数据获取配置
    'begin_date': '2025-02-05',
    'end_date': '2025-02-08',
    'time_intervals': ['1m'],  # 可选: ['1m', '5m', '15m', '1h', '4h', '1d']
    'use_all_usdt_pairs': True,  # 设置为True则获取所有USDT交易对
    'specific_symbols': ['KAS/USDT:USDT'],  # 当use_all_usdt_pairs为False时使用
    'base_path': r'\\znas\Main\futures',  # 指定数据保存的根目录
    
    # 代理配置
    'proxy': {
        'host': '127.0.0.1',
        'port': 1913
    },
    
    # 交易所基础配置
    'exchange_config': {
        'timeout': 5000,
        'enableRateLimit': False,
        'options': {
            'defaultType': 'future'  # 设置为合约模式
        }
    }
}

# 确保根目录存在
os.makedirs(params['base_path'], exist_ok=True)

# 生成日期列表
start_date = datetime.strptime(params['begin_date'], '%Y-%m-%d')
end_date = datetime.strptime(params['end_date'], '%Y-%m-%d')
date_list = pd.date_range(start=start_date, end=end_date, freq='D').strftime('%Y-%m-%d').tolist()

In [21]:
def scan_existing_files(base_path):
    """预扫描已存在的文件"""
    existing_files = set()
    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith('.csv'):
                existing_files.add(file)
    print(f"已扫描到 {len(existing_files)} 个现有文件")
    return existing_files

def analyze_download_status(target_symbols, existing_files, date_list, time_intervals):
    """分析每个交易对的下载情况"""
    stats = {}
    for symbol in target_symbols:
        stats[symbol] = {
            'total_expected': len(date_list) * len(time_intervals),
            'downloaded': 0,
            'missing_dates': []
        }
        
        for start_time in date_list:
            for time_interval in time_intervals:
                date_str = str(pd.to_datetime(start_time).date())
                file_name = f"{date_str}_{symbol.replace('/', '_').replace(':', '_')}_{time_interval}.csv"
                if file_name in existing_files:
                    stats[symbol]['downloaded'] += 1
                else:
                    stats[symbol]['missing_dates'].append(f"{date_str}_{time_interval}")

    print("\n下载统计信息:")
    incomplete_symbols = []
    for symbol, data in stats.items():
        completion_rate = (data['downloaded'] / data['total_expected']) * 100
        print(f"{symbol}: 完成率 {completion_rate:.2f}% ({data['downloaded']}/{data['total_expected']})")
        if data['downloaded'] < data['total_expected']:
            incomplete_symbols.append(symbol)
            if len(data['missing_dates']) <= 10:
                print(f"  缺失数据: {data['missing_dates'][:10]}")
            else:
                print(f"  缺失数据过多，共{len(data['missing_dates'])}个日期")

    print(f"\n未完全下载的交易对数量: {len(incomplete_symbols)}/{len(target_symbols)}")
    return incomplete_symbols

In [22]:
def init_exchange():
    """初始化交易所接口"""
    config = {
        **params['exchange_config'],
        'proxies': {
            'http': f"http://{params['proxy']['host']}:{params['proxy']['port']}",
            'https': f"http://{params['proxy']['host']}:{params['proxy']['port']}"
        }
    }
    return ccxt.binance(config)

exchange = init_exchange()

In [23]:
def get_available_symbols():
    """获取可用的交易对列表"""
    if params['use_all_usdt_pairs']:
        markets = exchange.load_markets()
        return [symbol for symbol in markets.keys() if ':USDT' in symbol]
    else:
        return params['specific_symbols']

def fetch_and_save_data(symbol, timeframe, start_time):
    """获取并保存单个交易对的数据"""
    try:
        # 构建文件名和路径
        date_str = str(pd.to_datetime(start_time).date())
        date_path = os.path.join(params['base_path'], date_str)
        os.makedirs(date_path, exist_ok=True)
        
        file_name = f"{date_str}_{symbol.replace('/', '').replace(':', '_')}_{timeframe}.csv"
        save_path = os.path.join(date_path, file_name)
        
        # 如果文件已存在，跳过
        if os.path.exists(save_path):
            # print(f"文件 {file_name} 已存在，跳过下载")
            return True, None
        
        # 获取数据
        print(f'正在获取 {exchange.id} {symbol} {timeframe} {start_time} 的数据')
        since = int(pd.Timestamp(f'{start_time} 00:00:00').timestamp() * 1000)
        end = int(pd.Timestamp(f'{start_time} 23:59:59').timestamp() * 1000)
        
        all_data = []
        current_since = since
        
        while current_since < end:
            data = exchange.fetch_ohlcv(
                symbol=symbol,
                timeframe=timeframe,
                since=current_since,
                limit=1000
            )
            
            if not data:
                break
                
            all_data.extend(data)
            current_since = data[-1][0] + 1
            time.sleep(0.8)
        
        if not all_data:
            print(f"{symbol} 在 {start_time} 无数据")
            return False, None
            
        # 转换为DataFrame并保存
        df = pd.DataFrame(
            all_data,
            columns=['datetime', 'open', 'high', 'low', 'close', 'volume']
        )
        df['datetime'] = pd.to_datetime(df['datetime'], unit='ms')
        
        # 筛选当天数据
        target_date = pd.to_datetime(start_time).date()
        df = df[df['datetime'].dt.date == target_date]
        
        # 数据清理
        df = df.drop_duplicates(subset=['datetime'], keep='last')\
               .sort_values('datetime')\
               .reset_index(drop=True)
        
        if not df.empty:
            df.to_csv(save_path, index=False)
            # print(f'成功下载并保存 {symbol} 在 {start_time} 的数据，数据形状: {df.shape}')
            return True, df
        else:
            print(f"{symbol} 在 {start_time} 筛选后无数据")
            return False, None
        
    except Exception as e:
        print(f'获取数据失败: {symbol}_{timeframe}_{start_time}, 错误: {e}')
        return False, None

In [24]:
# 获取现有文件列表
existing_files = scan_existing_files(params['base_path'])

# 获取要处理的交易对
target_symbols = get_available_symbols()
print(f"将处理以下交易对: {target_symbols}")

# 主循环
error_list = []
empty_data_count = {}  # 用于记录每个交易对的连续空数据天数

# 反转日期列表，从最新日期开始获取
date_list.reverse()

for symbol in target_symbols:
    empty_data_count[symbol] = 0
    
    for start_time in date_list:
        if empty_data_count[symbol] >= 3:
            print(f'{symbol} 连续 {empty_data_count[symbol]} 天无数据，跳转到下一个交易对')
            break
            
        for time_interval in params['time_intervals']:
            success, df = fetch_and_save_data(symbol, time_interval, start_time)
            
            if not success:
                error_msg = f'{exchange.id}_{symbol}_{time_interval}_{start_time}'
                error_list.append(error_msg)
                empty_data_count[symbol] += 1
            else:
                empty_data_count[symbol] = 0
                
        if empty_data_count[symbol] >= 3:
            break

# 分析下载情况
print("\n开始分析下载情况...")
existing_files = scan_existing_files(params['base_path'])  # 重新扫描
incomplete_symbols = analyze_download_status(
    target_symbols,
    existing_files,
    date_list,
    params['time_intervals']
)

# 询问是否重新下载未完成的交易对
if incomplete_symbols:
    print("\n是否要重新下载未完成的交易对？(y/n)")
    if input().lower() == 'y':
        target_symbols = incomplete_symbols
        # 重新运行主循环
        print("\n开始重新下载未完成的交易对...")

已扫描到 111251 个现有文件
将处理以下交易对: ['BTC/USDT:USDT', 'ETH/USDT:USDT', 'BCH/USDT:USDT', 'XRP/USDT:USDT', 'EOS/USDT:USDT', 'LTC/USDT:USDT', 'TRX/USDT:USDT', 'ETC/USDT:USDT', 'LINK/USDT:USDT', 'XLM/USDT:USDT', 'ADA/USDT:USDT', 'XMR/USDT:USDT', 'DASH/USDT:USDT', 'ZEC/USDT:USDT', 'XTZ/USDT:USDT', 'BNB/USDT:USDT', 'ATOM/USDT:USDT', 'ONT/USDT:USDT', 'IOTA/USDT:USDT', 'BAT/USDT:USDT', 'VET/USDT:USDT', 'NEO/USDT:USDT', 'QTUM/USDT:USDT', 'IOST/USDT:USDT', 'THETA/USDT:USDT', 'ALGO/USDT:USDT', 'ZIL/USDT:USDT', 'KNC/USDT:USDT', 'ZRX/USDT:USDT', 'COMP/USDT:USDT', 'OMG/USDT:USDT', 'DOGE/USDT:USDT', 'SXP/USDT:USDT', 'KAVA/USDT:USDT', 'BAND/USDT:USDT', 'RLC/USDT:USDT', 'WAVES/USDT:USDT', 'MKR/USDT:USDT', 'SNX/USDT:USDT', 'DOT/USDT:USDT', 'DEFI/USDT:USDT', 'YFI/USDT:USDT', 'BAL/USDT:USDT', 'CRV/USDT:USDT', 'TRB/USDT:USDT', 'RUNE/USDT:USDT', 'SUSHI/USDT:USDT', 'EGLD/USDT:USDT', 'SOL/USDT:USDT', 'ICX/USDT:USDT', 'STORJ/USDT:USDT', 'BLZ/USDT:USDT', 'UNI/USDT:USDT', 'AVAX/USDT:USDT', 'FTM/USDT:USDT', 'ENJ/USDT:USD