In [None]:
import pandas as pd
import os

def clean_futures_csv(file_path: str):
    print(f"\n📂 正在处理文件：{file_path}")
    
    try:
        df = pd.read_csv(file_path, parse_dates=["date"])
    except Exception as e:
        print(f"❌ 读取失败：{e}")
        return

    print(f"✅ 原始记录数：{len(df)}")

    # 1. 去重
    df.drop_duplicates(subset=["symbol", "date"], inplace=True)

    # 2. 缺失值处理
    missing = df.isnull().sum()
    if missing.sum() > 0:
        print("⚠️ 存在缺失值：")
        print(missing[missing > 0])
        df.dropna(inplace=True)
        print(f"✅ 已删除缺失值行，剩余 {len(df)} 条")

    # 3. 去除价格为0或负数的记录
    for col in ["open", "high", "low", "close"]:
        if col in df.columns:
            df = df[df[col] > 0]

    # 4. 成交量、持仓量为非负
    if "volume" in df.columns:
        df = df[df["volume"] >= 0]
    if "open_interest" in df.columns:
        df = df[df["open_interest"] >= 0]

    # 5. 排序
    if "date" in df.columns and "symbol" in df.columns:
        df.sort_values(by=["date", "symbol"], inplace=True)

    # 6. 覆盖保存
    df.to_csv(file_path, index=False, encoding="utf-8-sig")
    print(f"✅ 清洗完成，最终记录数：{len(df)}")

def batch_clean_all_csv(folder_path="F:/国债期货数据/"):
    print(f"\n🔎 扫描目录：{folder_path}")
    all_csv = [f for f in os.listdir(folder_path) if f.endswith(".csv")]
    
    if not all_csv:
        print("⚠️ 未发现任何 .csv 文件")
        return

    for filename in all_csv:
        full_path = os.path.join(folder_path, filename)
        clean_futures_csv(full_path)

if __name__ == "__main__":
    print("🚿 启动国债期货数据批量清洗程序")
    batch_clean_all_csv("F:/国债期货数据/")
    print("\n✅ 所有 .csv 文件清洗完成")
