# funcs

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
# df显示所有列
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.8f}'.format)
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import os
from dcdl_amps_csv import *
def analyze_slippage_backtest(file_path):
    """
    Analyze slippage distribution from a JSON file containing trading data.
    Each line of the file is a separate JSON object.
    
    Args:
        file_path: Path to the JSON file
    """
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return
    
    # Lists to store slippage values and related data
    slippage_values = []
    slippage_by_type = defaultdict(list)
    slippage_by_volume = defaultdict(list)
    
    # Read and process the file line by line
    line_count = 0
    valid_slippage_count = 0

    with open(file_path, 'r') as f:
        for line in f:
            line_count += 1
            
            try:
                # Parse JSON object
                data = json.loads(line)
                
                # Check if slippage is available and not null
                if 'slippage' in data and data['slippage'] is not None:
                    slippage = data['slippage']
                    trade_type = data['type']
                    volume = data['volume']
                    
                    # Store slippage value
                    slippage_values.append(slippage)
                    slippage_by_type[trade_type].append(slippage)
                    slippage_by_volume[volume].append(slippage)
                    
                    valid_slippage_count += 1
            except json.JSONDecodeError:
                print(f"Error parsing JSON at line {line_count}")
                continue
    
    if not slippage_values:
        print("No valid slippage values found.")
        return
    
    # Convert to numpy array for analysis
    slippage_array = np.array(slippage_values)
    
    # Calculate statistics
    stats = {
        'count': len(slippage_array),
        'mean': np.mean(slippage_array),
        'median': np.median(slippage_array),
        'std': np.std(slippage_array),
        'min': np.min(slippage_array),
        'max': np.max(slippage_array),
        'percentile_1': np.percentile(slippage_array, 1),
        'percentile_5': np.percentile(slippage_array, 5),
        'percentile_10': np.percentile(slippage_array, 10),
        'percentile_25': np.percentile(slippage_array, 25),
        'percentile_50': np.percentile(slippage_array, 50),
        'percentile_55': np.percentile(slippage_array, 55),        
        'percentile_60': np.percentile(slippage_array, 60),
        'percentile_65': np.percentile(slippage_array, 65),
        'percentile_75': np.percentile(slippage_array, 75),
        'percentile_95': np.percentile(slippage_array, 95),
        'percentile_99': np.percentile(slippage_array, 99)
    }
    
    # Print statistics
    print("Slippage Statistics:")
    print(f"Count: {stats['count']}")
    print(f"Mean: {stats['mean']:.8f}")
    print(f"Median: {stats['median']:.8f}")
    print(f"Standard Deviation: {stats['std']:.8f}")
    print(f"Min: {stats['min']:.8f}")
    print(f"Max: {stats['max']:.8f}")
    print(f"1st Percentile: {stats['percentile_1']:.8f}")
    print(f"5th Percentile: {stats['percentile_5']:.8f}")
    print(f"10th Percentile: {stats['percentile_10']:.8f}")
    print(f"25th Percentile: {stats['percentile_25']:.8f}")
    print(f"50th Percentile: {stats['percentile_50']:.8f}")
    print(f"55th Percentile: {stats['percentile_55']:.8f}")
    print(f"60th Percentile: {stats['percentile_60']:.8f}")
    print(f"65th Percentile: {stats['percentile_65']:.8f}")
    print(f"75th Percentile: {stats['percentile_75']:.8f}")
    print(f"95th Percentile: {stats['percentile_95']:.8f}")
    print(f"99th Percentile: {stats['percentile_99']:.8f}")
    
    # Statistics by trade type
    print("\nSlippage by Trade Type:")
    for trade_type, values in slippage_by_type.items():
        values_array = np.array(values)
        print(f"{trade_type}:")
        print(f"  Count: {len(values_array)}")
        print(f"  Mean: {np.mean(values_array):.8f}")
        print(f"  Median: {np.median(values_array):.8f}")
        print(f"  Standard Deviation: {np.std(values_array):.8f}")
    
    # Create visualizations
    create_visualizations(slippage_array, slippage_by_type, slippage_by_volume)
    
    return stats

def create_visualizations(slippage_array, slippage_by_type, slippage_by_volume):
    """
    Create three visualizations in one row:
    1. Histogram of all slippage values
    2. Histogram of slippage for trade type = Maker_ask
    3. Histogram of slippage for trade type = Maker_bid
    """
    import matplotlib.pyplot as plt
    import numpy as np

    type_labels = {
        "all": "All Slippage Distribution",
        "Maker_ask": "Type = Maker_ask (主动买单/挂卖)",
        "Maker_bid": "Type = Maker_bid (主动卖单/挂买)"
    }
    colors = {
        "all": "blue",
        "Maker_ask": "green",
        "Maker_bid": "red"
    }

    # Create a single row with 3 subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    # 1. All slippage
    axes[0].hist(slippage_array, bins=50, alpha=0.7, color=colors["all"])
    axes[0].set_title(type_labels["all"])
    axes[0].set_xlabel('Slippage')
    axes[0].set_ylabel('Frequency')
    axes[0].grid(True, alpha=0.3)

    # 2. Maker_ask
    if "Maker_ask" in slippage_by_type and len(slippage_by_type["Maker_ask"]) > 0:
        axes[1].hist(slippage_by_type["Maker_ask"], bins=50, alpha=0.7, color=colors["Maker_ask"])
    axes[1].set_title('Ask Slippage Distribution')
    axes[1].set_xlabel('Slippage')
    axes[1].set_ylabel('Frequency')
    axes[1].grid(True, alpha=0.3)

    # 3. Maker_bid
    if "Maker_bid" in slippage_by_type and len(slippage_by_type["Maker_bid"]) > 0:
        axes[2].hist(slippage_by_type["Maker_bid"], bins=50, alpha=0.7, color=colors["Maker_bid"])
    axes[2].set_title('Bid Slippage Distribution')
    axes[2].set_xlabel('Slippage')
    axes[2].set_ylabel('Frequency')
    axes[2].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('slippage_hist_row.png')
    plt.show()
def analyze_slippage(file_path, file_path2 = None,file_path3 = None,  starttime=None, endtime=None):
    """
    分析滑点数据的函数

    Parameters:
    file_path (str): CSV文件路径
    starttime (str or pd.Timestamp, optional): 筛选大于该时间的数据，格式如'2025-07-08 00:00:00'
    endtime (str or pd.Timestamp, optional): 筛选小于该时间的数据，格式如'2025-07-08 23:59:59'

    Returns:
    tuple: (slippage_stats, sell_stats, buy_stats, df_processed, slippage_percentiles)
    """
    import numpy as np

    # 读取数据
    df = pd.read_csv(file_path)
    if file_path2 is not None:
        df2 = pd.read_csv(file_path2)
        df = pd.concat([df, df2], ignore_index=True)

    if file_path3 is not None:
        df3 = pd.read_csv(file_path3)
        df = pd.concat([df, df3], ignore_index=True)
    df = df[df.Order2FilledPrice!=0]

    # 计算滑点
    df['SR'] = df['Price']/df['Order2FilledPrice']-1
    df['slippage'] = df['SR']-df['ESR']
    df['sign'] = df['Side'].apply(lambda x: 1 if x == 'sell' else -1)
    df['slippage'] = df['slippage']*df['sign']
    df['TimeUsed'] = (pd.to_datetime(df['Order2Timestamp']) - pd.to_datetime(df['Timestamp'])).dt.total_seconds()
    df['HedgingTimeUsed'] = (pd.to_datetime(df['Order2Timestamp']) - pd.to_datetime(df['Timestamp'])).dt.total_seconds()    
    df['Amount'] = df['AmountFilled']*df['AveragePrice']
    df = df.drop_duplicates(subset=['OrderID'])

    # 筛选大于starttime和小于endtime的数据，精确到秒
    if starttime is not None or endtime is not None:
        df['Timestamp_dt'] = pd.to_datetime(df['Timestamp'])
        if starttime is not None:
            if not isinstance(starttime, pd.Timestamp):
                starttime = pd.to_datetime(starttime)
            starttime = starttime.replace(microsecond=0)
            df = df[df['Timestamp_dt'] > starttime]
        if endtime is not None:
            if not isinstance(endtime, pd.Timestamp):
                endtime = pd.to_datetime(endtime)
            endtime = endtime.replace(microsecond=0)
            df = df[df['Timestamp_dt'] < endtime]
        df = df.drop(columns=['Timestamp_dt'])

    # 获取滑点统计信息
    slippage_stats = df['slippage'].describe()
    df= df[df['slippage'].notna()]
    # 增加百分位统计
    slippage_array = df['slippage'].values
    stats = {
        'count': len(slippage_array),
        'mean': np.mean(slippage_array),
        'median': np.median(slippage_array),
        'std': np.std(slippage_array),
        'min': np.min(slippage_array),
        'max': np.max(slippage_array),
        'percentile_1': np.percentile(slippage_array, 1),
        'percentile_5': np.percentile(slippage_array, 5),
        'percentile_10': np.percentile(slippage_array, 10),
        'percentile_25': np.percentile(slippage_array, 25),
        'percentile_50': np.percentile(slippage_array, 50),
        'percentile_55': np.percentile(slippage_array, 55),        
        'percentile_60': np.percentile(slippage_array, 60),
        'percentile_65': np.percentile(slippage_array, 65),
        'percentile_75': np.percentile(slippage_array, 75),
        'percentile_95': np.percentile(slippage_array, 95),
        'percentile_99': np.percentile(slippage_array, 99)
    }
    
    # 创建子图
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(30, 8))

    # 分析卖出滑点
    sell_slippage = df[df.Side=='sell']['slippage']
    sell_slippage.hist(ax=ax1, bins=30, alpha=0.7, color='red')
    ax1.set_title('Sell Slippage Distribution')
    ax1.set_xlabel('Slippage')
    ax1.set_ylabel('Frequency')
    sell_mean = sell_slippage.mean()
    sell_std = sell_slippage.std()
    ax1.axvline(sell_mean, color='red', linestyle='--', label=f'Mean: {sell_mean:.6f}')
    ax1.legend()
    # print(f"Sell slippage - Mean: {sell_mean:.6f}, Std: {sell_std:.6f}")

    # 卖方向分位数
    sell_percentiles = {
        'count': len(sell_slippage),
        'mean': np.mean(sell_slippage) if len(sell_slippage) > 0 else np.nan,
        'median': np.median(sell_slippage) if len(sell_slippage) > 0 else np.nan,
        'std': np.std(sell_slippage) if len(sell_slippage) > 0 else np.nan,
        'min': np.min(sell_slippage) if len(sell_slippage) > 0 else np.nan,
        'max': np.max(sell_slippage) if len(sell_slippage) > 0 else np.nan,
        'percentile_1': np.percentile(sell_slippage, 1) if len(sell_slippage) > 0 else np.nan,
        'percentile_5': np.percentile(sell_slippage, 5) if len(sell_slippage) > 0 else np.nan,
        'percentile_10': np.percentile(sell_slippage, 10) if len(sell_slippage) > 0 else np.nan,
        'percentile_25': np.percentile(sell_slippage, 25) if len(sell_slippage) > 0 else np.nan,
        'percentile_50': np.percentile(sell_slippage, 50) if len(sell_slippage) > 0 else np.nan,
        'percentile_55': np.percentile(sell_slippage, 55) if len(sell_slippage) > 0 else np.nan,
        'percentile_60': np.percentile(sell_slippage, 60) if len(sell_slippage) > 0 else np.nan,
        'percentile_65': np.percentile(sell_slippage, 65) if len(sell_slippage) > 0 else np.nan,
        'percentile_75': np.percentile(sell_slippage, 75) if len(sell_slippage) > 0 else np.nan,
        'percentile_95': np.percentile(sell_slippage, 95) if len(sell_slippage) > 0 else np.nan,
        'percentile_99': np.percentile(sell_slippage, 99) if len(sell_slippage) > 0 else np.nan,
    }
    print("Sell slippage percentiles:")
    for k, v in sell_percentiles.items():
        print(f"  {k}: {v:.8f}")

    # 分析买入滑点
    buy_slippage = df[df.Side=='buy']['slippage']
    buy_slippage.hist(ax=ax2, bins=30, alpha=0.7, color='blue')
    ax2.set_title('Buy Slippage Distribution')
    ax2.set_xlabel('Slippage')
    ax2.set_ylabel('Frequency')
    buy_mean = buy_slippage.mean()
    buy_std = buy_slippage.std()
    ax2.axvline(buy_mean, color='blue', linestyle='--', label=f'Mean: {buy_mean:.6f}')
    ax2.legend()
    # print(f"Buy slippage - Mean: {buy_mean:.6f}, Std: {buy_std:.6f}")

    # 买方向分位数
    buy_percentiles = {
        'count': len(buy_slippage),
        'mean': np.mean(buy_slippage) if len(buy_slippage) > 0 else np.nan,
        'median': np.median(buy_slippage) if len(buy_slippage) > 0 else np.nan,
        'std': np.std(buy_slippage) if len(buy_slippage) > 0 else np.nan,
        'min': np.min(buy_slippage) if len(buy_slippage) > 0 else np.nan,
        'max': np.max(buy_slippage) if len(buy_slippage) > 0 else np.nan,
        'percentile_1': np.percentile(buy_slippage, 1) if len(buy_slippage) > 0 else np.nan,
        'percentile_5': np.percentile(buy_slippage, 5) if len(buy_slippage) > 0 else np.nan,
        'percentile_10': np.percentile(buy_slippage, 10) if len(buy_slippage) > 0 else np.nan,
        'percentile_25': np.percentile(buy_slippage, 25) if len(buy_slippage) > 0 else np.nan,
        'percentile_50': np.percentile(buy_slippage, 50) if len(buy_slippage) > 0 else np.nan,
        'percentile_55': np.percentile(buy_slippage, 55) if len(buy_slippage) > 0 else np.nan,
        'percentile_60': np.percentile(buy_slippage, 60) if len(buy_slippage) > 0 else np.nan,
        'percentile_65': np.percentile(buy_slippage, 65) if len(buy_slippage) > 0 else np.nan,
        'percentile_75': np.percentile(buy_slippage, 75) if len(buy_slippage) > 0 else np.nan,
        'percentile_95': np.percentile(buy_slippage, 95) if len(buy_slippage) > 0 else np.nan,
        'percentile_99': np.percentile(buy_slippage, 99) if len(buy_slippage) > 0 else np.nan,
    }
    print("Buy slippage percentiles:")
    for k, v in buy_percentiles.items():
        print(f"  {k}: {v:.8f}")

    # overall滑点分布
    df['slippage'].hist(ax=ax3, bins=30, alpha=0.7, color='green')
    ax3.set_title('Overall Slippage Distribution')
    ax3.set_xlabel('Slippage')
    ax3.set_ylabel('Frequency')
    overall_mean = df['slippage'].mean()
    ax3.axvline(overall_mean, color='green', linestyle='--', label=f'Mean: {overall_mean:.6f}')
    ax3.legend()

    plt.tight_layout()
    plt.show()

    print("Overall slippage percentiles:")
    for k, v in stats.items():
        print(f"  {k}: {v:.8f}")
    return stats, sell_percentiles, buy_percentiles, df


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def analyze_slippage_compare(
    file_path_a: str,
    file_path_b: str,
    starttime=None,
    endtime=None,
    label_a: str = "FileA",
    label_b: str = "FileB",
    bins: int = 50,
    xlim_quantile=(0.01, 0.99),
    show: bool = True,
):
    """
    输入两个路径，对比两个文件的滑点：
    - 输出一个对比表格 compare_table
    - 输出一个 2x3 图（每行一个文件：Sell / Buy / Overall）

    返回:
        compare_table (pd.DataFrame): MultiIndex 行(side, metric)，列含 label_a, label_b, diff(B-A), diff_bps(B-A)*1e4
        fig (matplotlib.figure.Figure)
        df_a (pd.DataFrame): 处理后的 A 数据
        df_b (pd.DataFrame): 处理后的 B 数据
    """

    def _to_ts(x):
        if x is None:
            return None
        return x if isinstance(x, pd.Timestamp) else pd.to_datetime(x)

    def _load_and_process(path: str) -> pd.DataFrame:
        df = pd.read_csv(path)
        # 基本健壮性检查
        required_cols = [
            "Order2FilledPrice", "Price", "ESR", "Side",
            "Order2Timestamp", "Timestamp",
            "AmountFilled", "AveragePrice", "OrderID"
        ]
        missing = [c for c in required_cols if c not in df.columns]
        if missing:
            raise ValueError(f"[{path}] 缺少必要列: {missing}")

        df = df[df["Order2FilledPrice"] != 0].copy()

        # 计算滑点（保持你原来的定义与符号处理）
        df["SR"] = df["Price"] / df["Order2FilledPrice"] - 1
        df["slippage"] = df["SR"] - df["ESR"]
        df["sign"] = df["Side"].apply(lambda x: 1 if x == "sell" else -1)
        df["slippage"] = df["slippage"] * df["sign"]

        # 时间消耗
        t2 = pd.to_datetime(df["Order2Timestamp"])
        t1 = pd.to_datetime(df["Timestamp"])
        df["TimeUsed"] = (t2 - t1).dt.total_seconds()
        df["HedgingTimeUsed"] = df["TimeUsed"]

        # 额外字段
        df["Amount"] = df["AmountFilled"] * df["AveragePrice"]

        # 去重
        df = df.drop_duplicates(subset=["OrderID"]).copy()

        # 时间筛选（精确到秒）
        st = _to_ts(starttime)
        et = _to_ts(endtime)
        if st is not None or et is not None:
            ts = pd.to_datetime(df["Timestamp"]).dt.floor("S")
            if st is not None:
                st = st.floor("S")
                df = df[ts > st]
            if et is not None:
                et = et.floor("S")
                df = df[ts < et]

        df = df[df["slippage"].notna()].copy()
        return df

    def _summarize(series: pd.Series, percentiles=(1, 5, 10, 25, 50, 75, 90, 95, 99)) -> dict:
        arr = series.dropna().to_numpy()
        if len(arr) == 0:
            out = {"count": 0, "mean": np.nan, "std": np.nan, "median": np.nan, "min": np.nan, "max": np.nan}
            for p in percentiles:
                out[f"p{p}"] = np.nan
            return out

        out = {
            "count": int(len(arr)),
            "mean": float(np.mean(arr)),
            "std": float(np.std(arr)),
            "median": float(np.median(arr)),
            "min": float(np.min(arr)),
            "max": float(np.max(arr)),
        }
        for p in percentiles:
            out[f"p{p}"] = float(np.percentile(arr, p))
        return out

    # --- 读 + 处理 ---
    df_a = _load_and_process(file_path_a)
    df_b = _load_and_process(file_path_b)

    # --- 统计表（overall / sell / buy） ---
    sides = {
        "sell": lambda d: d[d["Side"] == "sell"]["slippage"],
        "buy":  lambda d: d[d["Side"] == "buy"]["slippage"],
        "overall": lambda d: d["slippage"],
    }

    rows = []
    for side_name, getter in sides.items():
        sa = _summarize(getter(df_a))
        sb = _summarize(getter(df_b))
        for k in sa.keys():  # 两边key一致
            rows.append((side_name, k, sa[k], sb[k]))

    summary_df = pd.DataFrame(rows, columns=["side", "metric", label_a, label_b]).set_index(["side", "metric"])
    summary_df["diff"] = summary_df[label_b] - summary_df[label_a]
    compare_table = summary_df[[label_a, label_b, "diff"]]

    # --- 画 2x3 分布图 ---
    # 为了两行可比，统一 xlim：用 combined 的 overall 分位数范围
    def _q_range(s1, s2, ql, qh):
        x = pd.concat([s1.dropna(), s2.dropna()], ignore_index=True)
        if len(x) == 0:
            return None, None
        lo = x.quantile(ql) if ql is not None else x.min()
        hi = x.quantile(qh) if qh is not None else x.max()
        if np.isfinite(lo) and np.isfinite(hi) and lo < hi:
            return float(lo), float(hi)
        return None, None

    ql, qh = xlim_quantile if xlim_quantile is not None else (None, None)
    x_lo, x_hi = _q_range(df_a["slippage"], df_b["slippage"], ql, qh)

    fig, axes = plt.subplots(2, 3, figsize=(22, 10), sharex=True)
    col_defs = [("sell", "Sell"), ("buy", "Buy"), ("overall", "Overall")]
    row_defs = [(df_a, label_a), (df_b, label_b)]

    for r, (dff, lab) in enumerate(row_defs):
        for c, (side_key, side_title) in enumerate(col_defs):
            ax = axes[r, c]
            s = sides[side_key](dff).dropna()

            ax.hist(s, bins=bins, alpha=0.7)
            mu = s.mean() if len(s) else np.nan
            sd = s.std() if len(s) else np.nan
            if np.isfinite(mu):
                ax.axvline(mu, linestyle="--", label=f"mean={mu:.6g}")

            ax.set_title(f"{lab} - {side_title} (n={len(s)}, std={sd:.6g})")
            ax.set_xlabel("slippage")
            ax.set_ylabel("freq")
            ax.legend()

            if x_lo is not None and x_hi is not None:
                ax.set_xlim(x_lo, x_hi)

    plt.tight_layout()
    if show:
        plt.show()

    return compare_table, fig, df_a, df_b


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def analyze_slippage_compare(
    file_paths,                 # <- 改：传入 list
    starttime=None,
    endtime=None,
    labels=None,                # <- 改：labels 也用 list
    bins: int = 50,
    xlim_quantile=(0.01, 0.99),
    show: bool = True,
):
    """
    输入多个路径(list)，对比多个文件的滑点：
    - 输出一个对比表格 compare_table
    - 输出一个 N*3 图（每行一个文件：Sell / Buy / Overall）

    返回:
        compare_table (pd.DataFrame): MultiIndex 行(side, metric)，列含各 label 以及 diff(相对第一个)
        fig (matplotlib.figure.Figure)
        *dfs (pd.DataFrame...): 处理后的各文件数据（按传入顺序展开返回）
    """

    # --- 新增：参数检查 & label 默认 ---
    if not isinstance(file_paths, (list, tuple)) or len(file_paths) < 2:
        raise ValueError("file_paths 必须是 list/tuple 且长度>=2，例如 ['a.csv','b.csv'] 或 ['a','b','c']")
    n = len(file_paths)

    if labels is None:
        labels = [f"File{i+1}" for i in range(n)]
    if not isinstance(labels, (list, tuple)) or len(labels) != n:
        raise ValueError("labels 必须为 list/tuple 且长度与 file_paths 一致")

    def _to_ts(x):
        if x is None:
            return None
        return x if isinstance(x, pd.Timestamp) else pd.to_datetime(x)

    def _load_and_process(path: str) -> pd.DataFrame:
        df = pd.read_csv(path)
        # 基本健壮性检查
        required_cols = [
            "Order2FilledPrice", "Price", "ESR", "Side",
            "Order2Timestamp", "Timestamp",
            "AmountFilled", "AveragePrice", "OrderID"
        ]
        missing = [c for c in required_cols if c not in df.columns]
        if missing:
            raise ValueError(f"[{path}] 缺少必要列: {missing}")

        df = df[df["Order2FilledPrice"] != 0].copy()

        # 计算滑点（保持你原来的定义与符号处理）
        df["SR"] = df["Price"] / df["Order2FilledPrice"] - 1
        df["slippage"] = df["SR"] - df["ESR"]
        df["sign"] = df["Side"].apply(lambda x: 1 if x == "sell" else -1)
        df["slippage"] = df["slippage"] * df["sign"]

        # 时间消耗
        t2 = pd.to_datetime(df["Order2Timestamp"])
        t1 = pd.to_datetime(df["Timestamp"])
        df["TimeUsed"] = (t2 - t1).dt.total_seconds()
        df["HedgingTimeUsed"] = df["TimeUsed"]

        # 额外字段
        df["Amount"] = df["AmountFilled"] * df["AveragePrice"]

        # 去重
        df = df.drop_duplicates(subset=["OrderID"]).copy()

        # 时间筛选（精确到秒）
        st = _to_ts(starttime)
        et = _to_ts(endtime)
        if st is not None or et is not None:
            ts = pd.to_datetime(df["Timestamp"]).dt.floor("S")
            if st is not None:
                st = st.floor("S")
                df = df[ts > st]
            if et is not None:
                et = et.floor("S")
                df = df[ts < et]

        df = df[df["slippage"].notna()].copy()
        return df

    def _summarize(series: pd.Series, percentiles=(1, 5, 10, 25, 50, 75, 90, 95, 99)) -> dict:
        arr = series.dropna().to_numpy()
        if len(arr) == 0:
            out = {"count": 0, "mean": np.nan, "std": np.nan, "median": np.nan, "min": np.nan, "max": np.nan}
            for p in percentiles:
                out[f"p{p}"] = np.nan
            return out

        out = {
            "count": int(len(arr)),
            "mean": float(np.mean(arr)),
            "std": float(np.std(arr)),
            "median": float(np.median(arr)),
            "min": float(np.min(arr)),
            "max": float(np.max(arr)),
        }
        for p in percentiles:
            out[f"p{p}"] = float(np.percentile(arr, p))
        return out

    # --- 读 + 处理（从2个变成N个）---
    dfs = [_load_and_process(p) for p in file_paths]

    # --- 统计表（overall / sell / buy） ---
    sides = {
        "sell": lambda d: d[d["Side"] == "sell"]["slippage"],
        "buy":  lambda d: d[d["Side"] == "buy"]["slippage"],
        "overall": lambda d: d["slippage"],
    }

    rows = []
    for side_name, getter in sides.items():
        stats_list = [_summarize(getter(df)) for df in dfs]
        keys = stats_list[0].keys()
        for k in keys:
            rows.append((side_name, k, *[st[k] for st in stats_list]))

    summary_df = pd.DataFrame(rows, columns=["side", "metric", *labels]).set_index(["side", "metric"])

    # 保持原逻辑：diff = (第2个 - 第1个)；现在扩展为：每个(第i个 - 第1个)
    base = labels[0]
    for lab in labels[1:]:
        summary_df[f"diff({lab}-{base})"] = summary_df[lab] - summary_df[base]

    compare_table = summary_df  # 原来是挑列；这里为了不丢信息，直接返回全表（含diff列）

    # --- 画 N*3 分布图（原来2*3） ---
    def _q_range_many(series_list, ql, qh):
        x = pd.concat([s.dropna() for s in series_list], ignore_index=True)
        if len(x) == 0:
            return None, None
        lo = x.quantile(ql) if ql is not None else x.min()
        hi = x.quantile(qh) if qh is not None else x.max()
        if np.isfinite(lo) and np.isfinite(hi) and lo < hi:
            return float(lo), float(hi)
        return None, None

    ql, qh = xlim_quantile if xlim_quantile is not None else (None, None)
    x_lo, x_hi = _q_range_many([df["slippage"] for df in dfs], ql, qh)

    fig, axes = plt.subplots(n, 3, figsize=(22, 5 * n), sharex=True)
    if n == 1:
        axes = np.array([axes])

    col_defs = [("sell", "Sell"), ("buy", "Buy"), ("overall", "Overall")]

    for r, (dff, lab) in enumerate(zip(dfs, labels)):
        for c, (side_key, side_title) in enumerate(col_defs):
            ax = axes[r, c]
            s = sides[side_key](dff).dropna()

            ax.hist(s, bins=bins, alpha=0.7)
            mu = s.mean() if len(s) else np.nan
            sd = s.std() if len(s) else np.nan
            if np.isfinite(mu):
                ax.axvline(mu, linestyle="--", label=f"mean={mu:.6g}")

            ax.set_title(f"{lab} - {side_title} (n={len(s)}, std={sd:.6g})")
            ax.set_xlabel("slippage")
            ax.set_ylabel("freq")
            ax.legend()

            if x_lo is not None and x_hi is not None:
                ax.set_xlim(x_lo, x_hi)

    plt.tight_layout()
    if show:
        plt.show()

    # 保持原返回风格：表格 + fig + df们（只是从2个变成可变长）
    return (compare_table, fig, *dfs)


In [None]:
df[df['slippage']>0][['Createtime','Timestamp','Symbol','OrderID','AveragePrice','ESR','Order2FilledPrice','Order2CreateTime','Order2Timestamp','SR','slippage','HedgingTimeUsed']]

In [None]:
df = pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (9).csv')
df[df['OrderID']==2894534748356714496.00000000 ]

In [None]:
(1.05/1.04-1)*365

In [None]:
(1.05-1.04)*365

In [None]:
df[df['slippage']>0]['HedgingTimeUsed'].quantile(0.98)

In [None]:
df['TimeUsed'].hist(bins=100)

In [None]:
df

In [None]:

_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (9).csv')
len(df[df['slippage']>=0])/len(df)

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2_1103.csv')

In [None]:
cf_depth_BTC = pd.read_csv('/Users/rayxu/Desktop/Obentech/cf_depth_BTC.csv')
beijing_time = pd.to_datetime(cf_depth_BTC['T']).dt.tz_localize('UTC').dt.tz_convert('Asia/Shanghai')
cf_depth_BTC['beijing_time'] = beijing_time


In [None]:
beijing_time = pd.to_datetime(cf_depth_BTC['T']).dt.tz_localize('UTC').dt.tz_convert('Asia/Shanghai')
cf_depth_BTC['beijing_time'] = beijing_time

In [None]:
df = pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_09_2.csv')
df[df.OrderID == 2673679677252673536.0]

In [None]:
pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_09_2.csv').loc[3673]
2673679677252673536.0

### BTC09

In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_09_2 (1).csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path, starttime='2025-07-10 09:00:00')
print("\nOverall slippage statistics:")
print(stats)

In [None]:
# 把 cf_depth 和 processed_df根据cf_depth的 beijing_time和processed_df的Createtime合并， 保留cf_depth的'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s', 'logret_mid_10.0s','ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s','logret_mid_60.0s'
processed_df['Createtime'] = pd.to_datetime(processed_df['Createtime'])
cf_depth_BTC = cf_depth_BTC[['beijing_time', 'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s','ret_mid_5.0s','logret_mid_10.0s','ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s','logret_mid_60.0s']]
cf_depth_BTC = cf_depth_BTC.dropna()
cf_depth_BTC = cf_depth_BTC.drop_duplicates(subset=['beijing_time'])

# Ensure both are tz-aware and in the same timezone (Asia/Shanghai)
if processed_df['Createtime'].dt.tz is None:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_localize('Asia/Shanghai')
else:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_convert('Asia/Shanghai')

# asof merge: need to sort by time
cf_depth = cf_depth_BTC.sort_values('beijing_time')
processed_df = processed_df.sort_values('Createtime')

# asof merge, allow_nearest for closest match
merged_df = pd.merge_asof(
    processed_df.reset_index(),
    cf_depth_BTC,
    left_on='Createtime',
    right_on='beijing_time',
    direction='backward'
)

# Restore index if needed
processed_df 

In [None]:
# # 先定义正确的分档区间，区间必须单调递增
# # 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# # 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# # labels顺序要和bins顺序一致
# slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
# merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# # 统计每档的ret_mid_1.0s, logret_mid_1.0s, ret_mid_10.0s, logret_mid_10.0s, ret_mid_30.0s, logret_mid_30.0s, ret_mid_60.0s, logret_mid_60.0s的平均值
# cols = ['ret_mid_1.0s', 'ret_mid_10.0s',  'ret_mid_30.0s', 'ret_mid_60.0s']
# slippage_stats = merged_df.groupby('slippage_bin')[cols].mean()

# # 画均值图
# import matplotlib.pyplot as plt

# # 统计每档的ret的绝对值均值
# abs_cols = [c for c in cols if c.startswith('ret_mid_') or c.startswith('logret_mid_')]
# for c in abs_cols:
#     merged_df[f'abs_{c}'] = merged_df[c].abs()
# abs_cols_abs = [f'abs_{c}' for c in abs_cols]
# slippage_stats_abs = merged_df.groupby('slippage_bin')[abs_cols_abs].mean()

# # 画绝对值均值图
# fig, ax = plt.subplots(figsize=(20, 8))
# slippage_stats_abs.plot(kind='bar', ax=ax)
# plt.title('Mean absolute returns (and log returns) by slippage bin')
# plt.ylabel('Mean absolute value')
# plt.xlabel('Slippage bin')
# plt.xticks(rotation=0)
# plt.grid(axis='y')
# plt.tight_layout()
# plt.show()

In [None]:
# # 先定义正确的分档区间，区间必须单调递增
# # 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# # 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# # labels顺序要和bins顺序一致
# slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
# merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# # 统计每档的ret_mid_1.0s, logret_mid_1.0s, ret_mid_10.0s, logret_mid_10.0s, ret_mid_30.0s, logret_mid_30.0s, ret_mid_60.0s, logret_mid_60.0s的平均值
# cols = ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']
# slippage_stats = merged_df.groupby('slippage_bin')[cols].mean()

# # 画图
# import matplotlib.pyplot as plt

# fig, ax = plt.subplots(figsize=(20, 8))
# slippage_stats.plot(kind='bar', ax=ax)
# plt.title('Mean returns by slippage bin')
# plt.ylabel('Mean value')
# plt.xlabel('Slippage bin')
# plt.xticks(rotation=0)
# plt.grid(axis='y')
# plt.tight_layout()
# plt.show()

In [None]:
processed_df[processed_df.slippage < -0.001]

In [None]:
# # 先定义正确的分档区间，区间必须单调递增
# # 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# # 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# # labels顺序要和bins顺序一致
# slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
# merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# # 新建signed方向，sell为-1，其余为1
# merged_df['signed'] = merged_df['Side'].apply(lambda x: 1 if x == 'sell' else -1)

# # 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
# for col in ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
#     merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# # 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
# signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
# slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# # 画图
# import matplotlib.pyplot as plt

# fig, ax = plt.subplots(figsize=(20, 8))
# slippage_stats_signed.plot(kind='bar', ax=ax)
# plt.title('Mean signed returns by slippage bin')
# plt.ylabel('Mean signed value')
# plt.xlabel('Slippage bin')
# plt.xticks(rotation=0)
# plt.grid(axis='y')
# plt.tight_layout()
# plt.show()

In [None]:
merged_df

In [None]:
merged_df[(merged_df.Side == 'buy') & (merged_df['ret_mid_5.0s'] < 0)]['slippage'].describe()

In [None]:
(224*0.000226+0.000197*93+0.000177*155+0.000178*118)

In [None]:
(0.000197*93+0.000178*118)/(93+118)

In [None]:
(0.000226*224+0.000177*155)/(224+155)

In [None]:
(245*0.000171+191*0.000192)/(245+191)

In [None]:
(167*0.000203+142*0.000208)/(167+142)

In [None]:
118*0.000178/(118+155)+(0.000197)*155/(118+155)

224*0.000226/(224+93)+(0.000197)*93/(224+93)

In [None]:
import pandas as pd

# 定义分组条件，分不同的ret_{t}s区间
time_windows = [1, 5, 10, 30, 60]
conditions = {}
for side in ['Sell', 'Buy']:
    for t in time_windows:
        col = f'ret_mid_{t}.0s'
        conditions[(side, t, f'Ret{t}s > 0')] = (merged_df.Side.str.lower() == side.lower()) & (merged_df[col] > 0)
        conditions[(side, t, f'Ret{t}s < 0')] = (merged_df.Side.str.lower() == side.lower()) & (merged_df[col] < 0)

# 计算均值和数量
results = []
for (side, t, ret), cond in conditions.items():
    slippage = merged_df.loc[cond, 'slippage']
    mean = slippage.mean()
    count = slippage.count()
    results.append({'方向': side, '窗口': t, '条件': ret, '均值': mean, '数量': count})

df_result = pd.DataFrame(results)

# 透视表格
pivot = df_result.pivot(index=['方向'], columns=['窗口', '条件'], values=['均值', '数量'])

# 每个t分别做数量加权均值配对计算
# Sell&Ret>0 和 Buy&Ret<0 配对，Sell&Ret<0 和 Buy&Ret>0 配对，分别针对每个t
weighted_means_pair = {}
for t in time_windows:
    # Sell&Ret>0 和 Buy&Ret<0
    sell_pos = df_result[(df_result['方向']=='Sell') & (df_result['窗口']==t) & (df_result['条件']==f'Ret{t}s > 0')]
    buy_neg = df_result[(df_result['方向']=='Buy') & (df_result['窗口']==t) & (df_result['条件']==f'Ret{t}s < 0')]
    if not sell_pos.empty and not buy_neg.empty:
        weighted1 = (sell_pos['均值'].values[0]*sell_pos['数量'].values[0] + buy_neg['均值'].values[0]*buy_neg['数量'].values[0]) / (sell_pos['数量'].values[0] + buy_neg['数量'].values[0])
    else:
        weighted1 = float('nan')
    weighted_means_pair[(f'Sell>0+Buy<0', t)] = weighted1

    # Sell&Ret<0 和 Buy&Ret>0
    sell_neg = df_result[(df_result['方向']=='Sell') & (df_result['窗口']==t) & (df_result['条件']==f'Ret{t}s < 0')]
    buy_pos = df_result[(df_result['方向']=='Buy') & (df_result['窗口']==t) & (df_result['条件']==f'Ret{t}s > 0')]
    if not sell_neg.empty and not buy_pos.empty:
        weighted2 = (sell_neg['均值'].values[0]*sell_neg['数量'].values[0] + buy_pos['均值'].values[0]*buy_pos['数量'].values[0]) / (sell_neg['数量'].values[0] + buy_pos['数量'].values[0])
    else:
        weighted2 = float('nan')
    weighted_means_pair[(f'Sell<0+Buy>0', t)] = weighted2

# 打印表格
print("BTC (对冲, 7/10-7/15实盘数据)")
header = "         " + "   ".join([f"Ret{t}s > 0      Ret{t}s < 0" for t in time_windows])
print(header)
for side in ['Sell', 'Buy']:
    row = []
    for t in time_windows:
        for cond in [f'Ret{t}s > 0', f'Ret{t}s < 0']:
            mean = df_result[(df_result['方向']==side) & (df_result['窗口']==t) & (df_result['条件']==cond)]['均值'].values[0]
            count = df_result[(df_result['方向']==side) & (df_result['窗口']==t) & (df_result['条件']==cond)]['数量'].values[0]
            row.append(f"{mean:.6f}({int(count)})")
    print(f"方向{side:<4} {'   '.join(row)}")
print("数量加权(配对: Sell&Ret>0+Buy&Ret<0 | Sell&Ret<0+Buy&Ret>0)")
weighted_row1 = []
weighted_row2 = []
for t in time_windows:
    weighted1 = weighted_means_pair[(f'Sell>0+Buy<0', t)]
    weighted2 = weighted_means_pair[(f'Sell<0+Buy>0', t)]
    weighted_row1.append(f"{weighted1:.6f}")
    weighted_row2.append(f"{weighted2:.6f}")
print("Sell>0+Buy<0:   " + "   ".join(weighted_row1))
print("Sell<0+Buy>0:   " + "   ".join(weighted_row2))

In [None]:
weighted_means

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < 0]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < -0.0008]

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > 0.0015]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > -0.0015]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < -0.0015]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_10.0s'] < -0.0015]['slippage'].describe()

In [None]:
merged_df[(merged_df.Side == 'sell') & (merged_df['ret_mid_5.0s'] < 0)]['slippage'].describe()

In [None]:
merged_df[(merged_df.Side == 'sell') & (merged_df['ret_mid_5.0s'] > 0)]['slippage'].describe()

In [None]:
merged_df[(merged_df.Side == 'sell') & (merged_df['ret_mid_10.0s'] > 0)]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_10.0s'] > 0]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_10.0s'] < 0]['slippage'].describe()

In [None]:
(-2.029804e-04+1.961758e-04)/-1.961758e-04

In [None]:
# 先定义更细的分档区间：在0到-0.001之间分成10档，最两边为一类
import numpy as np

# 生成-0.001到0的10个等距分割点
num_bins = 10
inner_bins = np.linspace(-0.001, 0, num_bins + 1)

# 拼接最两边
slippage_bins = [float('-inf')] + list(inner_bins) + [float('inf')]

# 构造labels
slippage_labels = []
slippage_labels.append(f'<= {inner_bins[0]:.6f}')
for i in range(num_bins):
    left = inner_bins[i]
    right = inner_bins[i+1]
    slippage_labels.append(f'{left:.6f} ~ {right:.6f}')
slippage_labels.append(f'> {inner_bins[-1]:.6f}')

merged_df['slippage_bin'] = pd.cut(
    merged_df['slippage'],
    bins=slippage_bins,
    labels=slippage_labels,
    right=True,
    include_lowest=True,
    ordered=True
)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s','ret_mid_5.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '5.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 统计每个bin的数量
bin_counts = merged_df['slippage_bin'].value_counts().sort_index()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()

# 在每个bar上方标注数量
for i, count in enumerate(bin_counts):
    # 取每个bar的最高点
    y = slippage_stats_signed.max().max()
    # 取当前bin的所有均值的最大值（以便标注在bar上方）
    y_bin = slippage_stats_signed.iloc[i].max()
    ax.text(i, y_bin + 0.00002, f'n={count}', ha='center', va='bottom', fontsize=12, rotation=90)

plt.show()

In [None]:
abs(merged_df['ret_mid_30.0s']).describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > 0.0004]['slippage'].describe()

In [None]:
merged_df['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < -0.0004]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < 0]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > 0]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < 0 ]['slippage'].describe()

In [None]:
merged_df[(merged_df.slippage > -0.00001) & (merged_df.Side == 'buy')]

In [None]:
df = pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_09_2.csv')
/Users/rayxu/Downloads/order.arbitrage_goat_okx_binance_99_2.csv
df = df[df.Order2FilledPrice!=0]
df['SR'] = df['Price']/df['Order2FilledPrice']-1
df['slippage'] = df['SR']-df['ESR']
df['sign'] = df['Side'].apply(lambda x: 1 if x == 'sell' else -1)
df['slippage'] = df['slippage']*df['sign']
df['slippage'].describe()

In [None]:
df[df.Side=='sell']['slippage'].hist(figsize=(12, 8))
print(df[df.Side=='sell']['slippage'].mean())
print(df[df.Side=='sell']['slippage'].std())

In [None]:
df[df.Side=='buy']['slippage'].hist(figsize=(12, 8))
print(df[df.Side=='buy']['slippage'].mean())
print(df[df.Side=='buy']['slippage'].std())

In [None]:
df[(df.Side=='buy')& (df.slippage>0)]

### BTC08

In [None]:
# 使用示例
file_path = '/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_08_2 (2).csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path)
print("\nOverall slippage statistics:")
print(stats)

In [None]:
# 筛选大于7/10的processed_df
processed_df['Createtime'] = pd.to_datetime(processed_df['Createtime'])
processed_df = processed_df[processed_df.Createtime > pd.Timestamp('2025-07-10')]

In [None]:
# 把 cf_depth 和 processed_df根据cf_depth的 beijing_time和processed_df的Createtime合并， 保留cf_depth的'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s', 'logret_mid_10.0s','ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s','logret_mid_60.0s'

# Convert Createtime to datetime if not already
processed_df['Createtime'] = pd.to_datetime(processed_df['Createtime'])

# Remove rows with null Createtime to avoid merge_asof error
processed_df = processed_df[processed_df['Createtime'].notnull()].copy()

# Select and clean cf_depth columns
cf_depth = cf_depth[['beijing_time', 'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s', 'logret_mid_10.0s','ret_mid_5.0s',
                     'ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s', 'logret_mid_60.0s']]
cf_depth = cf_depth.dropna()
cf_depth = cf_depth.drop_duplicates(subset=['beijing_time'])

# Ensure both are tz-aware and in the same timezone (Asia/Shanghai)
if processed_df['Createtime'].dt.tz is None:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_localize('Asia/Shanghai')
else:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_convert('Asia/Shanghai')

if pd.to_datetime(cf_depth['beijing_time']).dt.tz is None:
    cf_depth['beijing_time'] = pd.to_datetime(cf_depth['beijing_time']).dt.tz_localize('Asia/Shanghai')
else:
    cf_depth['beijing_time'] = pd.to_datetime(cf_depth['beijing_time']).dt.tz_convert('Asia/Shanghai')

# Sort by time for asof merge
cf_depth = cf_depth.sort_values('beijing_time')
processed_df = processed_df.sort_values('Createtime')

# asof merge, allow_nearest for closest match
merged_df = pd.merge_asof(
    processed_df.reset_index(),
    cf_depth,
    left_on='Createtime',
    right_on='beijing_time',
    direction='backward'
)



In [None]:
merged_df

In [None]:
merged_df[(merged_df.Side == 'sell') & (merged_df['ret_mid_10.0s'] > 0)]['slippage'].describe()

In [None]:
# 先定义正确的分档区间，区间必须单调递增
# 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# labels顺序要和bins顺序一致
slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_5.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# 先定义更细的分档区间：在0到-0.001之间分成10档，最两边为一类
import numpy as np

# 生成-0.001到0的10个等距分割点
num_bins = 10
inner_bins = np.linspace(-0.001, 0, num_bins + 1)

# 拼接最两边
slippage_bins = [float('-inf')] + list(inner_bins) + [float('inf')]

# 构造labels
slippage_labels = []
slippage_labels.append(f'<= {inner_bins[0]:.6f}')
for i in range(num_bins):
    left = inner_bins[i]
    right = inner_bins[i+1]
    slippage_labels.append(f'{left:.6f} ~ {right:.6f}')
slippage_labels.append(f'> {inner_bins[-1]:.6f}')

merged_df['slippage_bin'] = pd.cut(
    merged_df['slippage'],
    bins=slippage_bins,
    labels=slippage_labels,
    right=True,
    include_lowest=True,
    ordered=True
)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_5.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '5.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 统计每个bin的数量
bin_counts = merged_df['slippage_bin'].value_counts().sort_index()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()

# 在每个bar上方标注数量
for i, count in enumerate(bin_counts):
    # 取每个bar的最高点
    y = slippage_stats_signed.max().max()
    # 取当前bin的所有均值的最大值（以便标注在bar上方）
    y_bin = slippage_stats_signed.iloc[i].max()
    ax.text(i, y_bin + 0.00002, f'n={count}', ha='center', va='bottom', fontsize=12, rotation=90)

plt.show()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > 0.000]['slippage'].describe()

In [None]:
merged_df['signed_ret_mid_30.0s'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] < -0.0003]['slippage'].describe()

In [None]:
merged_df[merged_df['signed_ret_mid_30.0s'] > 0.0003]['slippage'].describe()

In [None]:
# 先定义正确的分档区间，区间必须单调递增
# 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# labels顺序要和bins顺序一致
slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# 统计每档的ret_mid_1.0s, logret_mid_1.0s, ret_mid_10.0s, logret_mid_10.0s, ret_mid_30.0s, logret_mid_30.0s, ret_mid_60.0s, logret_mid_60.0s的平均值
cols = ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']
slippage_stats = merged_df.groupby('slippage_bin')[cols].mean()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats.plot(kind='bar', ax=ax)
plt.title('Mean returns by slippage bin')
plt.ylabel('Mean value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# 先定义正确的分档区间，区间必须单调递增
# 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# labels顺序要和bins顺序一致
slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# 先定义更细的分档区间：在0到-0.001之间分成10档，最两边为一类
import numpy as np

# 生成-0.001到0的10个等距分割点
num_bins = 10
inner_bins = np.linspace(-0.001, 0, num_bins + 1)

# 拼接最两边
slippage_bins = [float('-inf')] + list(inner_bins) + [float('inf')]

# 构造labels
slippage_labels = []
slippage_labels.append(f'<= {inner_bins[0]:.6f}')
for i in range(num_bins):
    left = inner_bins[i]
    right = inner_bins[i+1]
    slippage_labels.append(f'{left:.6f} ~ {right:.6f}')
slippage_labels.append(f'> {inner_bins[-1]:.6f}')

merged_df['slippage_bin'] = pd.cut(
    merged_df['slippage'],
    bins=slippage_bins,
    labels=slippage_labels,
    right=True,
    include_lowest=True,
    ordered=True
)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 统计每个bin的数量
bin_counts = merged_df['slippage_bin'].value_counts().sort_index()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()

# 在每个bar上方标注数量
for i, count in enumerate(bin_counts):
    # 取每个bar的最高点
    y = slippage_stats_signed.max().max()
    # 取当前bin的所有均值的最大值（以便标注在bar上方）
    y_bin = slippage_stats_signed.iloc[i].max()
    ax.text(i, y_bin + 0.00002, f'n={count}', ha='center', va='bottom', fontsize=12, rotation=90)

plt.show()

In [None]:
processed_df[processed_df.slippage < -0.001]

In [None]:
processed_df[processed_df.OrderID == 2672731276698378240.0]

In [None]:
processed_df[processed_df.slippage > 0].tail(80)

In [None]:
processed_df.tail(30)

In [None]:
df[(df.Side=='buy')& (df.slippage<-0.0008)]

### ETH08

In [None]:
# 使用示例
file_path = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_08_2 (1).csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path)
print("\nOverall slippage statistics:")
print(stats)

In [None]:
# 把 cf_depth 和 processed_df根据cf_depth的 beijing_time和processed_df的Createtime合并， 保留cf_depth的'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s', 'logret_mid_10.0s','ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s','logret_mid_60.0s'

# Convert Createtime to datetime if not already
processed_df['Createtime'] = pd.to_datetime(processed_df['Createtime'])

# Remove rows with null Createtime to avoid merge_asof error
processed_df = processed_df[processed_df['Createtime'].notnull()].copy()

# Select and clean cf_depth columns
cf_depth = cf_depth[['beijing_time', 'ret_mid_1.0s', 'logret_mid_1.0s', 'ret_mid_10.0s', 'logret_mid_10.0s',
                     'ret_mid_30.0s', 'logret_mid_30.0s', 'ret_mid_60.0s', 'logret_mid_60.0s']]
cf_depth = cf_depth.dropna()
cf_depth = cf_depth.drop_duplicates(subset=['beijing_time'])

# Ensure both are tz-aware and in the same timezone (Asia/Shanghai)
if processed_df['Createtime'].dt.tz is None:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_localize('Asia/Shanghai')
else:
    processed_df['Createtime'] = processed_df['Createtime'].dt.tz_convert('Asia/Shanghai')

if pd.to_datetime(cf_depth['beijing_time']).dt.tz is None:
    cf_depth['beijing_time'] = pd.to_datetime(cf_depth['beijing_time']).dt.tz_localize('Asia/Shanghai')
else:
    cf_depth['beijing_time'] = pd.to_datetime(cf_depth['beijing_time']).dt.tz_convert('Asia/Shanghai')

# Sort by time for asof merge
cf_depth = cf_depth.sort_values('beijing_time')
processed_df = processed_df.sort_values('Createtime')

# asof merge, allow_nearest for closest match
merged_df = pd.merge_asof(
    processed_df.reset_index(),
    cf_depth,
    left_on='Createtime',
    right_on='beijing_time',
    direction='backward'
)

# 先定义正确的分档区间，区间必须单调递增
# 档位：slippage > -0.0001, -0.0001 >= slippage > -0.0003, -0.0003 >= slippage > -0.0005, slippage <= -0.0005
# 所以bins应该是 [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
# labels顺序要和bins顺序一致
slippage_bins = [float('-inf'), -0.0005, -0.0003, -0.0001, float('inf')]
slippage_labels = ['<= -0.0005', '-0.0005 ~ -0.0003', '-0.0003 ~ -0.0001', '> -0.0001']
merged_df['slippage_bin'] = pd.cut(merged_df['slippage'], bins=slippage_bins, labels=slippage_labels, right=True, include_lowest=True, ordered=True)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

In [None]:
# 先定义更细的分档区间：在0到-0.001之间分成10档，最两边为一类
import numpy as np

# 生成-0.001到0的10个等距分割点
num_bins = 10
inner_bins = np.linspace(-0.001, 0, num_bins + 1)

# 拼接最两边
slippage_bins = [float('-inf')] + list(inner_bins) + [float('inf')]

# 构造labels
slippage_labels = []
slippage_labels.append(f'<= {inner_bins[0]:.6f}')
for i in range(num_bins):
    left = inner_bins[i]
    right = inner_bins[i+1]
    slippage_labels.append(f'{left:.6f} ~ {right:.6f}')
slippage_labels.append(f'> {inner_bins[-1]:.6f}')

merged_df['slippage_bin'] = pd.cut(
    merged_df['slippage'],
    bins=slippage_bins,
    labels=slippage_labels,
    right=True,
    include_lowest=True,
    ordered=True
)

# 新建signed方向，sell为-1，其余为1
merged_df['signed'] = merged_df['Side'].apply(lambda x: -1 if x == 'sell' else 1)

# 新建signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s
for col in ['ret_mid_1.0s', 'ret_mid_10.0s', 'ret_mid_30.0s', 'ret_mid_60.0s']:
    merged_df[f'signed_{col}'] = merged_df[col] * merged_df['signed']

# 统计每档的signed_ret_mid_1.0s, signed_ret_mid_10.0s, signed_ret_mid_30.0s, signed_ret_mid_60.0s的平均值
signed_cols = [f'signed_ret_mid_{t}' for t in ['1.0s', '10.0s', '30.0s', '60.0s']]
slippage_stats_signed = merged_df.groupby('slippage_bin')[signed_cols].mean()

# 统计每个bin的数量
bin_counts = merged_df['slippage_bin'].value_counts().sort_index()

# 画图
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(20, 8))
slippage_stats_signed.plot(kind='bar', ax=ax)
plt.title('Mean signed returns by slippage bin')
plt.ylabel('Mean signed value')
plt.xlabel('Slippage bin')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()

# 在每个bar上方标注数量
for i, count in enumerate(bin_counts):
    # 取每个bar的最高点
    y = slippage_stats_signed.max().max()
    # 取当前bin的所有均值的最大值（以便标注在bar上方）
    y_bin = slippage_stats_signed.iloc[i].max()
    ax.text(i, y_bin + 0.00002, f'n={count}', ha='center', va='bottom', fontsize=12, rotation=90)

plt.show()

In [None]:
processed_df[processed_df.slippage<-0.0008].tail(20)

In [None]:
df = pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_kaito_okx_binance_99_2 (2).csv')

df = df[df.Order2FilledPrice!=0]
df['SR'] = df['Price']/df['Order2FilledPrice']-1
df['slippage'] = df['SR']-df['ESR']
df['sign'] = df['Side'].apply(lambda x: 1 if x == 'sell' else -1)
df['slippage'] = df['slippage']*df['sign']+0.00003
df['slippage'].mean()

In [None]:
df

# SOL08

In [None]:
# df = pd.read_csv('/Users/rayxu/Downloads/order.arbitrage_sol_okx_binance_08_2.csv')
# df = df[df.Order2FilledPrice!=0]
file_path = '/Users/rayxu/Downloads/order.arbitrage_sol_okx_binance_08_2.csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path)
print("\nOverall slippage statistics:")
print(stats)


In [None]:
processed_df['Createtime'] = pd.to_datetime(processed_df['Createtime'])
processed_df = processed_df[processed_df.Createtime > pd.Timestamp('2025-07-17')]

In [None]:
processed_df[processed_df.Side == 'buy']

In [None]:
processed_df[processed_df['Side']=='sell']['ESR'].mean()

In [None]:
processed_df[processed_df['Side']=='buy']['ESR'].mean()

## BTC 01

In [None]:
file_path = '/Users/rayxu/Downloads/order.btc_okx_binance_01_2.csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path, starttime='2025-07-31 13:31:39')
print("\nOverall slippage statistics:")
print(stats)

In [None]:
processed_df.sort_values(by='slippage',ascending=True)

In [None]:
file_path = '/Users/rayxu/Downloads/order.btc_okx_binance_02_2.csv'
stats, sell_stats, buy_stats, processed_df = analyze_slippage(file_path, starttime='2025-07-31 13:31:39')
print("\nOverall slippage statistics:")
print(stats)

In [None]:
processed_df.sort_values(by='Createtime',ascending=True).head(40)

# 实验结果对比

In [None]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import os

def analyze_slippage_backtest(file_path):
    """
    Analyze slippage distribution from a JSON file containing trading data.
    Each line of the file is a separate JSON object.
    
    Args:
        file_path: Path to the JSON file
    """
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return
    
    # Lists to store slippage values and related data
    slippage_values = []
    slippage_by_type = defaultdict(list)
    slippage_by_volume = defaultdict(list)
    
    # Read and process the file line by line
    line_count = 0
    valid_slippage_count = 0

    with open(file_path, 'r') as f:
        for line in f:
            line_count += 1
            
            try:
                # Parse JSON object
                data = json.loads(line)
                
                # Check if slippage is available and not null
                if 'slippage' in data and data['slippage'] is not None:
                    slippage = data['slippage']
                    trade_type = data['type']
                    volume = data['volume']
                    
                    # Store slippage value
                    slippage_values.append(slippage)
                    slippage_by_type[trade_type].append(slippage)
                    slippage_by_volume[volume].append(slippage)
                    
                    valid_slippage_count += 1
            except json.JSONDecodeError:
                print(f"Error parsing JSON at line {line_count}")
                continue
    
    if not slippage_values:
        print("No valid slippage values found.")
        return
    
    # Convert to numpy array for analysis
    slippage_array = np.array(slippage_values)
    
    # Calculate statistics
    stats = {
        'count': len(slippage_array),
        'mean': np.mean(slippage_array),
        'median': np.median(slippage_array),
        'std': np.std(slippage_array),
        'min': np.min(slippage_array),
        'max': np.max(slippage_array),
        'percentile_1': np.percentile(slippage_array, 1),
        'percentile_5': np.percentile(slippage_array, 5),
        'percentile_10': np.percentile(slippage_array, 10),
        'percentile_25': np.percentile(slippage_array, 25),
        'percentile_50': np.percentile(slippage_array, 50),
        'percentile_55': np.percentile(slippage_array, 55),        
        'percentile_60': np.percentile(slippage_array, 60),
        'percentile_65': np.percentile(slippage_array, 65),
        'percentile_75': np.percentile(slippage_array, 75),
        'percentile_95': np.percentile(slippage_array, 95),
        'percentile_99': np.percentile(slippage_array, 99)
    }
    
    # Print statistics
    print("Slippage Statistics:")
    print(f"Count: {stats['count']}")
    print(f"Mean: {stats['mean']:.8f}")
    print(f"Median: {stats['median']:.8f}")
    print(f"Standard Deviation: {stats['std']:.8f}")
    print(f"Min: {stats['min']:.8f}")
    print(f"Max: {stats['max']:.8f}")
    print(f"1st Percentile: {stats['percentile_1']:.8f}")
    print(f"5th Percentile: {stats['percentile_5']:.8f}")
    print(f"10th Percentile: {stats['percentile_10']:.8f}")
    print(f"25th Percentile: {stats['percentile_25']:.8f}")
    print(f"50th Percentile: {stats['percentile_50']:.8f}")
    print(f"55th Percentile: {stats['percentile_55']:.8f}")
    print(f"60th Percentile: {stats['percentile_60']:.8f}")
    print(f"65th Percentile: {stats['percentile_65']:.8f}")
    print(f"75th Percentile: {stats['percentile_75']:.8f}")
    print(f"95th Percentile: {stats['percentile_95']:.8f}")
    print(f"99th Percentile: {stats['percentile_99']:.8f}")
    
    # Statistics by trade type
    print("\nSlippage by Trade Type:")
    for trade_type, values in slippage_by_type.items():
        values_array = np.array(values)
        print(f"{trade_type}:")
        print(f"  Count: {len(values_array)}")
        print(f"  Mean: {np.mean(values_array):.8f}")
        print(f"  Median: {np.median(values_array):.8f}")
        print(f"  Standard Deviation: {np.std(values_array):.8f}")
    
    # Create visualizations
    create_visualizations(slippage_array, slippage_by_type, slippage_by_volume)
    
    return stats

def create_visualizations(slippage_array, slippage_by_type, slippage_by_volume):
    """
    Create three visualizations in one row:
    1. Histogram of all slippage values
    2. Histogram of slippage for trade type = Maker_ask
    3. Histogram of slippage for trade type = Maker_bid
    """
    import matplotlib.pyplot as plt
    import numpy as np

    type_labels = {
        "all": "All Slippage Distribution",
        "Maker_ask": "Type = Maker_ask (主动买单/挂卖)",
        "Maker_bid": "Type = Maker_bid (主动卖单/挂买)"
    }
    colors = {
        "all": "blue",
        "Maker_ask": "green",
        "Maker_bid": "red"
    }

    # Create a single row with 3 subplots
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))

    # 1. All slippage
    axes[0].hist(slippage_array, bins=50, alpha=0.7, color=colors["all"])
    axes[0].set_title(type_labels["all"])
    axes[0].set_xlabel('Slippage')
    axes[0].set_ylabel('Frequency')
    axes[0].grid(True, alpha=0.3)

    # 2. Maker_ask
    if "Maker_ask" in slippage_by_type and len(slippage_by_type["Maker_ask"]) > 0:
        axes[1].hist(slippage_by_type["Maker_ask"], bins=50, alpha=0.7, color=colors["Maker_ask"])
    axes[1].set_title('Ask Slippage Distribution')
    axes[1].set_xlabel('Slippage')
    axes[1].set_ylabel('Frequency')
    axes[1].grid(True, alpha=0.3)

    # 3. Maker_bid
    if "Maker_bid" in slippage_by_type and len(slippage_by_type["Maker_bid"]) > 0:
        axes[2].hist(slippage_by_type["Maker_bid"], bins=50, alpha=0.7, color=colors["Maker_bid"])
    axes[2].set_title('Bid Slippage Distribution')
    axes[2].set_xlabel('Slippage')
    axes[2].set_ylabel('Frequency')
    axes[2].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig('slippage_hist_row.png')
    plt.show()



In [None]:
# BTC 5s 对冲 正负万1 不过滤行情
file_path = "/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-18/2025-07-01-2025-07-13_1752883486.132423.json"
results = analyze_slippage_backtest(file_path) 

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753131533.778579.json')


In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753138977.148704.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753140838.4960299.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753143435.000588.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753145597.893425.json')

# 

In [None]:
# OIR + 过滤行情
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753150054.832012.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753150054.832012.json')

In [None]:
# 只过滤SELL, 0.0006
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753151673.4183512.json')

In [None]:
# if self.max_pos * self.order_size > self._position_ex0 and self.sr_bid < -0.0001 and self.sr_bid_300.percentile(
#         0.5) < -0.0001 and price_return_30s < 0.0006:
#     return 1
# if -self.max_pos * self.order_size < self._position_ex0 and self.sr_ask > 0.0001 and self.sr_ask_300.percentile(
#         0.5) > 0.0001:
#     return 2
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-21/2025-07-01-2025-07-13_1753155931.77145.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-22/2025-07-01-2025-07-13_1753216324.123404.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-22/2025-02-20-2025-02-26_1753223839.1197839.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-23/2025-06-15-2025-07-13_1753312132.4394119.json')



In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-24/2025-05-01-2025-05-31_1753398188.993484.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-24/2025-05-01-2025-05-31_1753402445.818174.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-25/2025-05-01-2025-05-31_1753477365.235345.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-25/2025-05-01-2025-05-31_1753483577.598297.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-27/2025-04-01-2025-04-30_1753661363.4618418.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-28/2025-07-01-2025-07-13_1753746252.157256.json')


In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-28/2025-07-01-2025-07-13_1753748285.254567.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-28/2025-07-01-2025-07-13_1753753382.714158.json')

In [None]:
scored_df

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-05_1753775101.038172.json')

### Dynamic Spread 版本回测验证过滤极端行情

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-04/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_-0.0001_5.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0_5.json')

In [None]:

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-31/2025-07-01-2025-07-13_1754005460.0496042.json')

In [None]:
# 7/1-7/13 BTC 10s 对冲 动态阈值 不过滤行情 挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-13_1753832383.2794082.json')

In [None]:
# 7/1-7/13 BTC 10s 对冲 动态阈值 过滤行情30s 0.0006 挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-13_1753834612.286644.json')

In [None]:
# 7/1-7/13 BTC 10s 对冲 动态阈值 过滤行情30s 0.0004 挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-13_1753836274.648629.json')

In [None]:
# 7/1-7/13 BTC 10s 对冲 动态阈值 过滤行情30s 0.001 挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-13_1753841273.707703.json')

In [None]:
# 7/1-7/13 BTC 10s 对冲 动态阈值 过滤行情30s 0.001 挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-07-01-2025-07-13_1753844029.8325791.json')


In [None]:
# 6/1-6/30 BTC 10s 对冲 动态阈值 不过滤行情  挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-06-01-2025-06-30_1753845496.175724.json')


In [None]:
# 21：37 start
# 6/1-6/30 BTC 10s 对冲 动态阈值 过滤行情 0.001  挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-06-01-2025-06-30_1753850243.431805.json')


In [None]:
# 6/1-6/30 BTC 10s 对冲 动态阈值 过滤行情 0.0006  挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-06-01-2025-06-30_1753853237.0063202.json')


In [None]:
# 6/1-6/30 BTC 10s 对冲 动态阈值 过滤行情 0.002  挂单距离0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-29/2025-06-01-2025-06-30_1753857127.5948448.json')



In [None]:
5/1 -5/30
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-05-01-2025-05-30_1753907846.2815092.json')


In [None]:

# 5/1 -5/30 0.0006
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-05-01-2025-05-30_1753911162.587743.json')

In [None]:
# 4月 benchmark
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-04-01-2025-04-30_1753915849.388318.json')

In [None]:
0.0006
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-04-01-2025-04-30_1753921646.696481.json')

In [None]:
# 3月 benchmark
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-03-01-2025-03-30_1753926890.0780668.json')

In [None]:
# 3月 0.0006
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-03-01-2025-03-30_1753931933.668311.json')


In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0001_5.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0_5.json')

In [None]:
scored_df = pd.read_csv('/Users/rayxu/Downloads/2025072205_scored_features_swap.csv')
scored_df = scored_df[scored_df.Symbol.isin (['BTC-USDT','ETH-USDT','XRP-USDT','SOL-USDT','DOGE-USDT','TRUMP-USDT','PENGU-USDT'])]

# 挂单距离

In [None]:
import pandas as pd
import numpy as np

# 读入数据
file_path1 = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_08_2 (6).csv'
file_path2 = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (5).csv'

stats1, sell_stats1, buy_stats1, df1 = analyze_slippage(file_path1,starttime='2025-07-31 13:31:39')
stats2, sell_stats2, buy_stats2, df2 = analyze_slippage(file_path2,starttime='2025-07-31 13:31:39')

import pandas as pd

def make_slippage_summary_table(stats1, sell_stats1, buy_stats1, stats2, sell_stats2, buy_stats2):
    """
    将5s和60s的slippage统计dict拼成一个大表格

    参数:
    - stats1, sell_stats1, buy_stats1: 5s对冲的三个统计字典
    - stats2, sell_stats2, buy_stats2: 60s对冲的三个统计字典

    返回:
    - DataFrame，包含比较结果
    """

    # 统一所有字段名
    keys = [
        "count", "mean", "median", "std", "min", "max",
        "percentile_1", "percentile_5", "percentile_10", "percentile_25",
        "percentile_50", "percentile_55", "percentile_60", "percentile_65",
        "percentile_75", "percentile_95", "percentile_99"
    ]

    data = {
        "Metric": keys,
        "d=0 Sell": [sell_stats1.get(k, None) for k in keys],
        "d=0 Buy": [buy_stats1.get(k, None) for k in keys],
        "d=0 All": [stats1.get(k, None) for k in keys],
        "d=5 Sell": [sell_stats2.get(k, None) for k in keys],
        "d=5 Buy": [buy_stats2.get(k, None) for k in keys],
        "d=5 All": [stats2.get(k, None) for k in keys],
    }

    return pd.DataFrame(data)
df = make_slippage_summary_table(stats1, sell_stats1, buy_stats1, stats2, sell_stats2, buy_stats2)
df
# # 确保 Createtime 是 datetime 类型（如果是字符串格式）





In [None]:
# df1['Createtime'] = pd.to_datetime(df1['Createtime'])
# df2['Createtime'] = pd.to_datetime(df2['Createtime'])

# # 排序
# df1 = df1.sort_values('Createtime').reset_index(drop=True)
# df2 = df2.sort_values('Createtime').reset_index(drop=True)

# # 把 df1 的时间戳转为 numpy array（加快查找）
# df1_times = df1['Createtime'].values.astype('datetime64[ms]')

# # 设置时间阈值：100ms
# threshold = np.timedelta64(1000, 'ms')

# # 创建一个掩码数组：True 表示该行在 df2 中找不到接近时间，应该保留
# keep_mask = []

# for t2 in df2['Createtime'].values.astype('datetime64[ms]'):
#     # 查找 df1 中是否有时间在 t2 ± 100ms 内
#     time_diffs = np.abs(df1_times - t2)
#     if np.any(time_diffs <= threshold):
#         keep_mask.append(False)
#     else:
#         keep_mask.append(True)

# # 筛选出 df2 中“独有”的行
# df2_unique = df2[np.array(keep_mask)].reset_index(drop=True)

# # 查看结果
# print(f"原始 df2 成交数: {len(df2)}")
# print(f"匹配后剩余独有成交数: {len(df2_unique)}")
from tqdm import tqdm

import pandas as pd
import numpy as np
from tqdm import tqdm

def find_extra_times(df1, df2, time_col='Createtime', tolerance_ms=100):
    ts1 = pd.to_datetime(df1[time_col]).sort_values().reset_index(drop=True)
    ts2 = pd.to_datetime(df2[time_col]).sort_values().reset_index(drop=True)

    extra_indices = []

    i = j = 0
    n1, n2 = len(ts1), len(ts2)

    for i in range(n1):
        t1 = ts1[i]
        # 二分推进 j，直到 ts2[j] > t1 - tolerance
        while j < n2 and ts2[j] < t1 - pd.Timedelta(milliseconds=tolerance_ms):
            j += 1
        # 检查当前 j 是否在 t1 ± tolerance 内
        if j < n2 and abs((ts2[j] - t1).total_seconds() * 1000) <= tolerance_ms:
            continue  # 有匹配，跳过
        else:
            extra_indices.append(i)

    return df1.loc[ts1.index[extra_indices]].copy()

df1_extra_orders = find_extra_times(df1, df2, time_col='Timestamp', tolerance_ms=7000)
df1_extra_orders


In [None]:
# 找出 df2 独有成交（即 df1 中没有时间靠近的）
df2_extra_orders = find_extra_times(df2, df1, time_col='Createtime', tolerance_ms=7000)
df2_extra_orders['slippage'].mean()

In [None]:
def find_common_time_matched(df1, df2, time_col='Createtime', tolerance_ms=1000):
    df1 = df1.copy()
    df2 = df2.copy()
    df1[time_col] = pd.to_datetime(df1[time_col])
    df2[time_col] = pd.to_datetime(df2[time_col])

    df1 = df1.sort_values(time_col).reset_index(drop=True)
    df2 = df2.sort_values(time_col).reset_index(drop=True)

    matches_df1 = []
    matches_df2 = []

    used_df2 = set()

    for i, row1 in df1.iterrows():
        t1 = np.datetime64(row1[time_col], 'ms')
        df2_candidates = df2[~df2.index.isin(used_df2)]
        diffs = np.abs(df2_candidates[time_col].values.astype('datetime64[ms]') - t1)
        diffs_ms = diffs.astype('timedelta64[ms]').astype(int)

        if len(diffs_ms) == 0:
            continue

        j = diffs_ms.argmin()
        if diffs_ms[j] <= tolerance_ms:
            row2 = df2_candidates.iloc[j]
            matches_df1.append(row1)
            matches_df2.append(row2)
            used_df2.add(df2_candidates.index[j])

    df1_filtered = pd.DataFrame(matches_df1).reset_index(drop=True)
    df2_filtered = pd.DataFrame(matches_df2).reset_index(drop=True)

    return df1_filtered, df2_filtered
df1_filtered, df2_filtered = find_common_time_matched(df1, df2, tolerance_ms=1000)



In [None]:
print(len(df1_filtered))
print(len(df2_filtered))
print(df1_filtered['slippage'].mean())
print(df2_filtered['slippage'].mean())

df1_filtered['slippage'].mean()
df2_filtered['slippage'].mean()

In [None]:
def compute_position(df, time_col='Createtime', side_col='Side', amount_col='Amount'):
    df = df.copy()
    df[time_col] = pd.to_datetime(df[time_col])
    df = df.sort_values(time_col)

    # 转换 Side 为方向：买入为 +1，卖出为 -1
    df['signed_amount'] = df[amount_col] * df[side_col].map({'buy': 1, 'sell': -1})
    
    # 计算累积仓位
    df['position'] = df['signed_amount'].cumsum()

    return df[[time_col, 'position']]
df1_pos = compute_position(df1, side_col='Side', amount_col='Order2FilledAmount')
df2_pos = compute_position(df2, side_col='Side', amount_col='Order2FilledAmount')
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df1_pos['Createtime'],
    y=df1_pos['position'],
    mode='lines',
    name='df1 Position',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=df2_pos['Createtime'],
    y=df2_pos['position'],
    mode='lines',
    name='df2 Position',
    line=dict(color='red')
))

fig.update_layout(
    title="Position Over Time",
    xaxis_title="Time",
    yaxis_title="Cumulative Position",
    legend=dict(x=0, y=1),
    hovermode="x unified"
)

fig.show()


In [None]:
df2_filtered

In [None]:
import numpy as np
import pandas as pd

def check_if_really_unmatched(df1_extra_orders, df2, time_col='Createtime', tolerance_ms=100):
    """
    检查 df1_extra_orders 中的时间是否真的无法在 df2 中找到近似匹配。

    返回一个 DataFrame，添加 matched_in_df2 和 min_time_diff_ms 两列
    """
    df2_times = pd.to_datetime(df2[time_col]).sort_values().to_numpy(dtype='datetime64[ms]')
    check_results = []
    min_diffs = []

    for t1 in pd.to_datetime(df1_extra_orders[time_col]):
        t1_np = np.datetime64(t1, 'ms')
        diffs = np.abs(df2_times - t1_np).astype('timedelta64[ms]').astype(int)
        min_diff = diffs.min() if len(diffs) > 0 else np.inf
        check_results.append(min_diff <= tolerance_ms)
        min_diffs.append(min_diff)

    result = df1_extra_orders.copy()
    result['matched_in_df2'] = check_results
    result['min_time_diff_ms'] = min_diffs
    return result

# 检查 tolerance=100ms 下，df1_extra_orders 中哪些其实可以和 df2 匹配
check_df = check_if_really_unmatched(df1_extra_orders, df2, tolerance_ms=7000)

# 统计一下
print(check_df['matched_in_df2'].value_counts())
print(f"其实可以匹配上的数量: {(check_df['matched_in_df2']).sum()} / {len(check_df)}")


In [None]:
df1_extra_orders['slippage'].mean()

In [None]:
df1_extra_orders

In [None]:
df1.columns

In [None]:
df1

In [None]:
df2

In [None]:
extra_df1

In [None]:
processed_df1

In [None]:
-0.0001-processed_df1[processed_df1.Side=='buy']['SR'].mean()

In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_08_2 (5).csv'
stats1, sell_stats1, buy_stats1, processed_df1 = analyze_slippage(file_path,starttime='2025-08-05 05:30:00')
file_path = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (4).csv'
stats2, sell_stats2, buy_stats2, processed_df2 = analyze_slippage(file_path,starttime='2025-08-05 05:30:00')

In [None]:
print(-0.0001-processed_df1[processed_df1.Side=='buy']['SR'].mean())
print(-0.0001-processed_df2[processed_df2.Side=='buy']['SR'].mean())


print(processed_df1[processed_df1.Side=='sell']['SR'].mean()-0.0001)
print(processed_df2[processed_df2.Side=='sell']['SR'].mean()-0.0001)

In [None]:
processed_df1[processed_df1.Side=='sell']['SR'].mean()

In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_08_2 (5).csv'
stats1, sell_stats1, buy_stats1, processed_df1 = analyze_slippage(file_path,starttime='2025-07-31 13:31:39')
print("\nOverall slippage statistics:")
print(stats1)

In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (4).csv'
stats2, sell_stats2, buy_stats2, processed_df2 = analyze_slippage(file_path,starttime='2025-07-31 13:31:39')
print("\nOverall slippage statistics:")
print(stats2)

In [None]:
df1[1000:1200]
print(len(processed_df1[processed_df1.Side == 'sell']))
print(len(processed_df2[processed_df2.Side == 'sell']))
print(len(processed_df1[processed_df1.Side == 'buy']))
print(len(processed_df2[processed_df2.Side == 'buy']))

In [None]:
df2[10:338]['slippage'].mean()

In [None]:
import plotly.graph_objects as go
import pandas as pd

# 筛选卖单
sell_df1 = df1[df1.Side == 'sell'].copy()
sell_df2 = df1[df1.Side == 'sell'].copy()

# 转换时间
sell_df1['Createtime'] = pd.to_datetime(sell_df1['Createtime'])
sell_df2['Createtime'] = pd.to_datetime(sell_df2['Createtime'])

# 计算每秒卖单数量（或指定时间粒度）
sell_df1_count = sell_df1.resample('1s', on='Createtime').size().reset_index(name='count')
sell_df2_count = sell_df2.resample('1s', on='Createtime').size().reset_index(name='count')

fig = go.Figure()

# df1 线
fig.add_trace(go.Scatter(
    x=sell_df1_count['Createtime'],
    y=sell_df1_count['count'],
    mode='lines',
    line=dict(color='blue', width=2),
    name='df1 sell count'
))

# df2 线
fig.add_trace(go.Scatter(
    x=sell_df2_count['Createtime'],
    y=sell_df2_count['count'],
    mode='lines',
    line=dict(color='red', width=2, dash='dot'),
    name='df2 sell count'
))

fig.update_layout(
    height=700,
    width=1800,
    yaxis_title='Number of Sell Orders',
    xaxis_title='Createtime',
    title='Sell Order Time Distribution (Line Chart)',
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)

fig.show()


In [None]:
processed_df1['Createtime'] = pd.to_datetime(processed_df1['Createtime'])   
processed_df1[(processed_df1.Createtime>pd.to_datetime('2025-08-03 09:51:10')) & (processed_df1.Createtime<pd.to_datetime('2025-08-03 11:10:15'))]['slippage'].mean()

In [None]:
processed_df1

In [None]:
import plotly.graph_objects as go

# 取出两个df中Side为'sell'的行
sell_df1 = processed_df1[processed_df1.Side == 'sell'].copy()
sell_df2 = processed_df2[processed_df2.Side == 'sell'].copy()

# 转换Createtime为pandas的datetime格式
sell_df1['Createtime'] = pd.to_datetime(sell_df1['Createtime'])
sell_df2['Createtime'] = pd.to_datetime(sell_df2['Createtime'])

fig = go.Figure()

# 在同一个坐标轴上画出两个数据集的卖单时间分布
fig.add_trace(go.Scatter(
    x=sell_df1['Createtime'],
    y=[1]*len(sell_df1),
    mode='markers',
    marker=dict(color='blue', size=10, symbol='circle'),
    name='df1 sell Createtime'
))

fig.add_trace(go.Scatter(
    x=sell_df2['Createtime'],
    y=[1]*len(sell_df2),
    mode='markers',
    marker=dict(color='red', size=10, symbol='x'),
    name='df2 sell Createtime'
))

fig.update_layout(
    height=700,
    width=1800,
    yaxis=dict(
        tickvals=[1],
        ticktext=['Sell Orders'],
        title='Sell Orders'
    ),
    xaxis_title='Createtime',
    title='Sell Order Createtime for Two DataFrames (Same Axis)',
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)

fig.show()

In [None]:
processed_df1['Order2FilledAmount'].max()

In [None]:
processed_df1.drop_duplicates(subset=['OrderID'])['Amount'].sum()/10

In [None]:
processed_df2.drop_duplicates(subset=['OrderID'])['Amount'].sum()/10

In [None]:
processed_df1['AmountFilled'].value_counts()

In [None]:
processed_df1[processed_df1.OrderID == 2724685136499286016.00000000]

In [None]:
processed_df1[processed_df1.Order2FilledAmount != 0.04]

In [None]:
processed_df1['Amount'].sum()/10

In [None]:
processed_df2['Amount'].sum()/10

In [None]:
processed_df[processed_df.Side == 'buy']['slippage'].describe()

In [None]:
processed_df[processed_df.Side == 'buy'].sort_values('slippage')

In [None]:
show_columns = ['Symbol', 'Date', 'tick_size_factor', 'q_lower', 'q_upper',
    'q_range',
       'ZCross_ratio', 'Mean_Crossings_ratio', 'Kurtosis', 'Half_Life', 'std',
       'bpv', 'jump_share', 'upper_long_event_count',
       'lower_long_event_count', 'event_switch_avg_gap_ms',
       'upper_avg_duration', 'lower_avg_duration','total_opps', 'Exchange1',
       'Exchange2', 'ex0_24h_usdt',
       'ex1_24h_usdt', 'total_24h_usdt', 'rank_by_amount']

In [None]:
scored_df[show_columns]

# 晚点对冲

In [None]:
# 晚点对冲的benchmark

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-31/2025-07-01-2025-07-13_1753995426.373256.json')

In [None]:
# 晚对冲 + 过滤极端行情 0.0006
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-07-01-2025-07-13_1753936668.396888.json')

In [None]:
# 筛选大于0
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-30/2025-07-01-2025-07-13_1753939636.832428.json')


# 对冲时间

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-07/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime.json')

In [None]:
# 5s vs 60s vs 60s过滤 vs 5s过滤
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-06/2025-08-03-2025-08-06_BTC_0.0_500.0_5_True_False_False_0.001_30_test_hedging_time_August.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-06/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.001_30_test_hedging_time_August.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-06/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_True_0.0_3_test_hedging_time_August_filter_price.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-06/2025-08-03-2025-08-06_BTC_0.0_500.0_5_True_False_True_0.0_3_test_hedging_time_August_filter_price.json')

In [None]:
# 7月
import pandas as pd

# 定义参数和文件路径
delays = [5, 10, 15, 20, 25, 30, 60]
results = []

for delay in delays:
    file_path = f'/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-0{1 if delay == 5 else 2}/2025-07-01-2025-07-13_BTC_0.0_500.0_{delay}_True_False_False_0.0_30.json'
    if delay == 10:
        file_path = f'/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-31/2025-07-01-2025-07-13_1754005460.0496042.json'
    # 假设 analyze_slippage_backtest 返回 dict 或 Series，包含我们关心的统计指标
    result = analyze_slippage_backtest(file_path)
    # 如果返回值不是 dict/Series，需要根据实际情况调整
    if hasattr(result, 'to_dict'):
        result = result.to_dict()
    result['delay'] = delay
    results.append(result)

# 转为 DataFrame
df = pd.DataFrame(results)
# 调整列顺序，把 delay 放前面
cols = ['delay'] + [col for col in df.columns if col != 'delay']
df = df[cols]
display(df)

In [None]:
# 6月版本跑通
import pandas as pd

# 定义参数和文件路径
delays = [5, 10, 15, 20, 25, 30, 60]
results = []

for delay in delays:
    if delay == 5:
        file_path = '/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_30.json'
    elif delay == 10:
        file_path = '/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-06-01-2025-06-30_BTC_0.0_500.0_10_True_False_False_0.0_30.json'
    else:
        file_path = f'/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-06-01-2025-06-30_BTC_0.0_500.0_{delay}_True_False_False_0.0_30.json'
    # 跑通 analyze_slippage_backtest
    result = analyze_slippage_backtest(file_path)
    if hasattr(result, 'to_dict'):
        result = result.to_dict()
    if result is not None:
        result['delay'] = delay
        results.append(result)
    else:
        # 如果返回None，补充空数据
        results.append({'delay': delay})

# 转为 DataFrame
df = pd.DataFrame(results)
# 调整列顺序，把 delay 放前面
if not df.empty:
    cols = ['delay'] + [col for col in df.columns if col != 'delay']
    df = df[cols]
    display(df)
else:
    print("无数据")

In [None]:
# 5月

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-05-01-2025-05-31_BTC_0.0_500.0_5_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-05-01-2025-05-31_BTC_0.0_500.0_10_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-05-01-2025-05-31_BTC_0.0_500.0_15_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-05-01-2025-05-31_BTC_0.0_500.0_20_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-05-01-2025-05-31_BTC_0.0_500.0_25_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-05-01-2025-05-31_BTC_0.0_500.0_30_True_False_False_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-02/2025-05-01-2025-05-31_BTC_0.0_500.0_60_True_False_False_0.0_30.json')

# 过滤时间

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_3.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_5.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_10.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_20.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_30.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_60.json')
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_120.json')



# 7月
import pandas as pd

# 定义参数和文件路径
delays = [3, 5, 10, 20, 30, 60, 120]
results = []

for delay in delays:
    file_path = f'/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-03/2025-07-01-2025-07-18_BTC_0.0_500.0_10_True_False_True_0.0_{delay}.json'
    # 假设 analyze_slippage_backtest 返回 dict 或 Series，包含我们关心的统计指标
    result = analyze_slippage_backtest(file_path)
    # 如果返回值不是 dict/Series，需要根据实际情况调整
    if hasattr(result, 'to_dict'):
        result = result.to_dict()
    result['FilterPeriod'] = delay
    results.append(result)

# 转为 DataFrame
df = pd.DataFrame(results)
# 调整列顺序，把 delay 放前面
cols = ['FilterPeriod'] + [col for col in df.columns if col != 'FilterPeriod']
df = df[cols]
display(df)

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0_5.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-01/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0001_5.json')


# Advanced

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-05/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.001_30_trend_by_regression.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-05/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.001_30_trend_by_regression.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-04/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.0006_30.json')

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-04/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.001_30.json')

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-04/2025-07-01-2025-07-13_BTC_0.0_500.0_10_True_False_True_0.001_30.json')

# 对冲时间实盘结果

In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_08_2 (4).csv'
stats1, sell_stats1, buy_stats1, processed_df1 = analyze_slippage(file_path, starttime='2025-08-06 13:49:00')


In [None]:
file_path = '/Users/rayxu/Downloads/order.arbitrage_btc_okx_binance_09_2 (4).csv'
stats2, sell_stats2, buy_stats2, processed_df2 = analyze_slippage(file_path, starttime='2025-08-06 13:49:00')

In [None]:
sell_stats 

In [None]:
import pandas as pd

def make_slippage_summary_table(stats1, sell_stats1, buy_stats1, stats2, sell_stats2, buy_stats2):
    """
    将5s和60s的slippage统计dict拼成一个大表格

    参数:
    - stats1, sell_stats1, buy_stats1: 5s对冲的三个统计字典
    - stats2, sell_stats2, buy_stats2: 60s对冲的三个统计字典

    返回:
    - DataFrame，包含比较结果
    """

    # 统一所有字段名
    keys = [
        "count", "mean", "median", "std", "min", "max",
        "percentile_1", "percentile_5", "percentile_10", "percentile_25",
        "percentile_50", "percentile_55", "percentile_60", "percentile_65",
        "percentile_75", "percentile_95", "percentile_99"
    ]

    data = {
        "Metric": keys,
        "5s Sell": [sell_stats1.get(k, None) for k in keys],
        "5s Buy": [buy_stats1.get(k, None) for k in keys],
        "5s All": [stats1.get(k, None) for k in keys],
        "60s Sell": [sell_stats2.get(k, None) for k in keys],
        "60s Buy": [buy_stats2.get(k, None) for k in keys],
        "60s All": [stats2.get(k, None) for k in keys],
    }

    return pd.DataFrame(data)
df = make_slippage_summary_table(stats1, sell_stats1, buy_stats1, stats2, sell_stats2, buy_stats2)
df

In [None]:
processed_df2[(processed_df2['Side']=='sell') & (processed_df2['slippage']<-0.0005)].head(40)

In [None]:
(processed_df2['slippage'].sum()+0.008)/862

## RollingStd

In [None]:
# 统计一下 对冲时间WaitLockTime的分布
import json
import pandas as pd

# 处理 JSONDecodeError: Extra data
file_path = '/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2_每天更新一次vol_long.json'
with open(file_path, 'r') as f:
    lines = f.readlines()

# 尝试每行都是一个合法的json对象
data = []
for line in lines:
    line = line.strip()
    if line:
        try:
            data.append(json.loads(line))
        except json.JSONDecodeError as e:
            print(f"JSON解析失败: {e}，内容: {line[:100]}...")

df = pd.DataFrame(data)
if 'wait2Lock_interval' in df.columns:
    print(df['wait2Lock_interval'].describe())
    print(df['wait2Lock_interval'].value_counts())
else:
    print("列 'wait2Lock_interval' 不存在于 DataFrame 中")


In [None]:
# 第二种方法，每天更新一次vol_long, 1个小时
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_720301440_shortterm_Type2_每天更新一次vol_long.json')

In [None]:
# 第二种方法。每天更新一次vol_long， 50s的vol5s
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_10301440_shortterm_Type2_每天更新一次vol_long.json')

In [None]:
# 第二种方法。每天更新一次vol_long， 5分钟的vol5s
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_60301440_shortterm_Type2_每天更新一次vol_long.json')

In [None]:
# 第二种方法。mean，每天更新一次vol_long
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2_每天更新一次vol_long.json')

In [None]:
# 第二种方法 
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json')

In [None]:
# 第一种方法 30分钟 1.1
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_36030_shortterm_coeff1.1.json')

In [None]:
# 第一种方法 30分钟
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_36030_shortterm.json')

In [None]:
# 
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTimeTwoType.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-06/2025-08-03-2025-08-06_BTC_0.0_500.0_60_True_False_False_0.0_3_rolling_std_ratio.json')


In [None]:
import json
import pandas as pd
import numpy as np
# 读取文件
with open("/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/price_and_vol_ratio", "r") as f:
    data = json.load(f)

# 转成 DataFrame
df = pd.DataFrame(data)

# 把 time 转成 pandas 时间类型
df['time'] = pd.to_datetime(df['time'])

df['vol_ratio_normalized'] = df['vol_ratio'] / np.sqrt(12)
df['vol_ratio_normalized'].describe()


In [None]:
df['vol_ratio_normalized'].describe()

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['time'],
    y=df['vol_ratio_normalized'],
    mode='lines',
    name='vol_ratio_normalized'
))
fig.update_layout(
    title='vol_ratio_normalized over time',
    xaxis_title='Time',
    yaxis_title='vol_ratio_normalized',
    width=2000,   # 放大宽度
    height=800    # 放大高度
)
fig.show()

In [None]:
df['vol_ratio_normalized'].median().plot()

In [None]:
df['expected_vol_5s'] = df['vol_5s']*np.sqrt(12)
df['indicator'] = df['expected_vol_5s'] - df['vol_long']

In [None]:
df['indicator'].plot()

In [None]:
df[df.time > pd.Timestamp('2025-08-06 00:00:00')].head(40)

In [None]:
pd.read

In [None]:
def compute_position(df, time_col='Createtime', side_col='Side', amount_col='Amount'):
    df = df.copy()
    df[time_col] = pd.to_datetime(df[time_col])
    df = df.sort_values(time_col)

    # 转换 Side 为方向：买入为 +1，卖出为 -1
    df['signed_amount'] = df[amount_col] * df[side_col].map({'buy': 1, 'sell': -1})
    
    # 计算累积仓位
    df['position'] = df['signed_amount'].cumsum()

    return df[[time_col, 'position']]
df1_pos = compute_position(df1, side_col='Side', amount_col='Order2FilledAmount')
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df1_pos['Createtime'],
    y=df1_pos['position'],
    mode='lines',
    name='df1 Position',
    line=dict(color='blue')
))

fig.update_layout(
    title="Position Over Time",
    xaxis_title="Time",
    yaxis_title="Cumulative Position",
    legend=dict(x=0, y=1),
    hovermode="x unified"
)

fig.show()


In [None]:
df1

In [None]:
analyze_slippage(file_path1, starttime='2025-08-07 16:28:00', endtime='2025-08-07 18:30:00')

In [None]:
analyze_slippage(file_path1,starttime='2025-08-08 03:43:00', endtime='2025-08-08 07:30:00')

In [None]:
file_path1 = '/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_08_2 (7).csv'

stats1, sell_stats1, buy_stats1, df1 = analyze_slippage(file_path1)

In [None]:
df1

In [None]:
import numpy as np
import pandas as pd

def check_if_really_unmatched(df1_extra_orders, df2, time_col='Createtime', tolerance_ms=100):
    """
    检查 df1_extra_orders 中的时间是否真的无法在 df2 中找到近似匹配。

    返回一个 DataFrame，添加 matched_in_df2 和 min_time_diff_ms 两列
    """
    df2_times = pd.to_datetime(df2[time_col]).sort_values().to_numpy(dtype='datetime64[ms]')
    check_results = []
    min_diffs = []

    for t1 in pd.to_datetime(df1_extra_orders[time_col]):
        t1_np = np.datetime64(t1, 'ms')
        diffs = np.abs(df2_times - t1_np).astype('timedelta64[ms]').astype(int)
        min_diff = diffs.min() if len(diffs) > 0 else np.inf
        check_results.append(min_diff <= tolerance_ms)
        min_diffs.append(min_diff)

    result = df1_extra_orders.copy()
    result['matched_in_df2'] = check_results
    result['min_time_diff_ms'] = min_diffs
    return result

# 检查 tolerance=100ms 下，df1_extra_orders 中哪些其实可以和 df2 匹配
check_df = check_if_really_unmatched(df1_extra_orders, df2, tolerance_ms=7000)

# 统计一下
print(check_df['matched_in_df2'].value_counts())
print(f"其实可以匹配上的数量: {(check_df['matched_in_df2']).sum()} / {len(check_df)}")


In [None]:
import numpy as np
import pandas as pd

def check_if_really_unmatched(df1_extra_orders, df2, time_col='Createtime', tolerance_ms=100):
    """
    检查 df1_extra_orders 中的时间是否真的无法在 df2 中找到近似匹配。

    返回一个 DataFrame，添加 matched_in_df2 和 min_time_diff_ms 两列
    """
    df2_times = pd.to_datetime(df2[time_col]).sort_values().to_numpy(dtype='datetime64[ms]')
    check_results = []
    min_diffs = []

    for t1 in pd.to_datetime(df1_extra_orders[time_col]):
        t1_np = np.datetime64(t1, 'ms')
        diffs = np.abs(df2_times - t1_np).astype('timedelta64[ms]').astype(int)
        min_diff = diffs.min() if len(diffs) > 0 else np.inf
        check_results.append(min_diff <= tolerance_ms)
        min_diffs.append(min_diff)

    result = df1_extra_orders.copy()
    result['matched_in_df2'] = check_results
    result['min_time_diff_ms'] = min_diffs
    return result

# 检查 tolerance=100ms 下，df1_extra_orders 中哪些其实可以和 df2 匹配
check_df = check_if_really_unmatched(df1_extra_orders, df2, tolerance_ms=7000)

# 统计一下
print(check_df['matched_in_df2'].value_counts())
print(f"其实可以匹配上的数量: {(check_df['matched_in_df2']).sum()} / {len(check_df)}")


In [None]:
import numpy as np
import pandas as pd

def check_if_really_unmatched(df1_extra_orders, df2, time_col='Createtime', tolerance_ms=100):
    """
    检查 df1_extra_orders 中的时间是否真的无法在 df2 中找到近似匹配。

    返回一个 DataFrame，添加 matched_in_df2 和 min_time_diff_ms 两列
    """
    df2_times = pd.to_datetime(df2[time_col]).sort_values().to_numpy(dtype='datetime64[ms]')
    check_results = []
    min_diffs = []

    for t1 in pd.to_datetime(df1_extra_orders[time_col]):
        t1_np = np.datetime64(t1, 'ms')
        diffs = np.abs(df2_times - t1_np).astype('timedelta64[ms]').astype(int)
        min_diff = diffs.min() if len(diffs) > 0 else np.inf
        check_results.append(min_diff <= tolerance_ms)
        min_diffs.append(min_diff)

    result = df1_extra_orders.copy()
    result['matched_in_df2'] = check_results
    result['min_time_diff_ms'] = min_diffs
    return result

# 检查 tolerance=100ms 下，df1_extra_orders 中哪些其实可以和 df2 匹配
check_df = check_if_really_unmatched(df1_extra_orders, df2, tolerance_ms=7000)

# 统计一下
print(check_df['matched_in_df2'].value_counts())
print(f"其实可以匹配上的数量: {(check_df['matched_in_df2']).sum()} / {len(check_df)}")


In [None]:
df1

In [None]:
df1['Side']

In [None]:
df1.columns

In [None]:
df1[df1['Side'] == 'buy']['slippage'].median()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_ControlGroup.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json')

# DynamicHeging

In [None]:
period_label_from_path("/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json")

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-21/2025-08-18-2025-08-21_ETH_0.0_500.0_60_True_False_False_0.0_3_dynamic_hedging_time_percentile_0.75.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-21/2025-08-18-2025-08-21_ETH_0.0_500.0_60_True_False_False_0.0_3_dynamic_hedging_time_对照.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-21/2025-08-18-2025-08-21_ETH_0.0_500.0_5_True_False_False_0.0_3_dynamic_hedging_time.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-21/2025-08-18-2025-08-21_ETH_0.0_500.0_5_True_False_False_0.0_3_dynamic_hedging_time_对照.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-21/2025-08-18-2025-08-21_ETH_0.0_500.0_60_True_False_False_0.0_3_dynamic_hedging_time_对照.json')

In [None]:
dfs

In [None]:
"/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json"



In [None]:
import os, re
import pandas as pd

# ====== 你的路径分组 ======
DynamicHedgingTimeType1 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-07-01-2025-07-31_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-06-01-2025-06-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-05-01-2025-05-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
]
ControlGroup = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
]
DynamicHedgingTimeType2 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-05-01-2025-05-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-06-01-2025-06-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-07-01-2025-07-31_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
]
DynamicHedgingTimeEWMAOneSide = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
]
GROUPS = {
    "DynamicHedgingTimeType1": DynamicHedgingTimeType1,
    "ControlGroup": ControlGroup,
    "DynamicHedgingTimeType2": DynamicHedgingTimeType2,
    "DynamicHedgingTimeEWMAOneSide": DynamicHedgingTimeEWMAOneSide
}
COL_ORDER = ["DynamicHedgingTimeType1", "ControlGroup", "DynamicHedgingTimeType2","DynamicHedgingTimeEWMAOneSide"]

# 从文件名提取月份标签（用起始日期的 YYYY-MM）
def period_label_from_path(p: str) -> str:
    # 形如 2025-08-01-2025-08-07_...
    m = re.search(r"(\d{4}-\d{2})-\d{4}-\d{2}", os.path.basename(p))
    return m.group(1) if m else "Unknown"

# —— 核心：直接用你的函数 —— #
# 假设函数已在当前会话中可用
# from your_module import analyze_slippage_backtest  # 若需从模块导入，解除注释并替换模块名

period_to_data = {}
for group_name, paths in GROUPS.items():
    for p in paths:
        period = period_label_from_path(p)
        stats = analyze_slippage_backtest(p)  # <—— 直接调用
        # stats 应为 dict
        period_to_data.setdefault(period, {})
        period_to_data[period][group_name] = pd.Series(stats)

# 组装每个月 DataFrame
dfs = {}
for period, data in period_to_data.items():
    df = pd.concat(data, axis=1)
    # 按指定列顺序对齐（若某组缺失则会 KeyError，保险起见 reindex）
    df = df.reindex(columns=COL_ORDER)
    dfs[period] = df

# 最终四个 df（不打印不保存）
df_2025_05 = dfs.get("2025-05")
df_2025_06 = dfs.get("2025-06")
df_2025_07 = dfs.get("2025-07")
df_2025_08 = dfs.get("2025-08")


In [None]:
pd.set_option('display.float_format', '{:.7f}'.format)


In [None]:
df_2025_08

In [None]:
df_2025_07

In [None]:
df_2025_06

In [None]:
df_2025_05

In [None]:
df1 = pd.read_json("/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-19/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json",lines=True)
df1

In [None]:
df1['wait2Lock_interval'].value_counts()

In [None]:
df1.groupby('wait2Lock_interval')['slippage'].mean()

In [None]:
df1[(df1['wait2Lock_interval']==60)&(df1['slippage']<-0.0003)]

In [None]:
df1[(df1['wait2Lock_interval']==60)&(df1['slippage']>=-0.0003)]

In [None]:
df2 = pd.read_json("/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",lines=True)
df2['wait2Lock_interval'].value_counts()

In [None]:
df2.groupby('wait2Lock_interval')['slippage'].mean()

In [None]:
import os
import pandas as pd

# ==== 你的路径分组 ====
DynamicHedgingTimeType1 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-07-01-2025-07-31_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-06-01-2025-06-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-05-01-2025-05-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
]
ControlGroup = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-08/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type1.json",
]
DynamicHedgingTimeType2 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-05-01-2025-05-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-06-01-2025-06-30_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-07-01-2025-07-31_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-09/2025-08-01-2025-08-07_BTC_0.0_500.0_60_True_False_False_0.0_3_DynamicHedgingTime_360301440_shortterm_Type2.json",
]

DynamicHedgingTimeEWMAOneSide = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_vol5_EWMA60.json",
]

DynamicHedgingTimeEWMATwoSide = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_EWMA5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_EWMA5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_EWMA5_EWMA60.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-18/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_EWMA5_EWMA60.json",
]

DynamicHedgingTimeEWMAoneSideTruncated = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-19/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-19/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-19/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-19/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json",
]

GROUPS = {
    "DynamicHedgingTimeType1": DynamicHedgingTimeType1,
    "ControlGroup": ControlGroup,
    "DynamicHedgingTimeType2": DynamicHedgingTimeType2,
    "DynamicHedgingTimeEWMAOneSide": DynamicHedgingTimeEWMAOneSide,
    "DynamicHedgingTimeEWMATwoSide": DynamicHedgingTimeEWMATwoSide,
    "DynamicHedgingTimeEWMAoneSideTruncated": DynamicHedgingTimeEWMAoneSideTruncated
}
COL_ORDER = ["DynamicHedgingTimeType1", "ControlGroup", "DynamicHedgingTimeType2","DynamicHedgingTimeEWMAOneSide","DynamicHedgingTimeEWMATwoSide","DynamicHedgingTimeEWMAoneSideTruncated"]

# 从文件名提取起始月份（不用 re）
def period_label_from_path(path: str) -> str:
    base = os.path.basename(path)  # e.g. 2025-08-01-2025-08-07_BTC_...
    first_token = base.split("_", 1)[0]  # "2025-08-01-2025-08-07"
    parts = first_token.split("-")       # ["2025","08","01","2025","08","07"]
    if len(parts) >= 2:
        return f"{parts[0]}-{parts[1]}"  # "2025-08"
    return "Unknown"

# —— 核心：直接用你的函数 —— #
# 假设 analyze_slippage_backtest 已在当前环境中可用
period_to_data = {}
for group_name, paths in GROUPS.items():
    for p in paths:
        period = period_label_from_path(p)
        stats = analyze_slippage_backtest(p)  # 返回 dict
        period_to_data.setdefault(period, {})
        period_to_data[period][group_name] = pd.Series(stats)

# 组装每个月 DataFrame（列按要求顺序）
dfs_by_period = {}
for period, data in period_to_data.items():
    df = pd.concat(data, axis=1)
    df = df.reindex(columns=COL_ORDER)  # 若某组缺字段则为 NaN
    dfs_by_period[period] = df

# 最终四个 df（不打印不保存）
df_2025_05 = dfs_by_period.get("2025-05")
df_2025_06 = dfs_by_period.get("2025-06")
df_2025_07 = dfs_by_period.get("2025-07")
df_2025_08 = dfs_by_period.get("2025-08")


# 过去四个小时大趋势

In [None]:
df_2025_08

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-08-01-2025-08-06_BTC_0.0_500.0_5_True_False_True_-0.01_14400_4h_filter_price.json')

In [None]:
import os
import pandas as pd

# === 你的三组路径 ===
ControlGroup = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_False_-0.01_14400_ControlGroup.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_False_-0.01_14400_ControlGroup.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_False_-0.01_14400_ControlGroup.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_False_-0.01_14400_ControlGroup.json",
]
Exp_m0p01 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_True_-0.01_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_True_-0.01_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_True_-0.01_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_True_-0.01_14400_4h_filter_price.json",
]
Exp_m0p005 = [
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-08-01-2025-08-07_BTC_0.0_500.0_5_True_False_True_-0.005_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-07-01-2025-07-31_BTC_0.0_500.0_5_True_False_True_-0.005_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-06-01-2025-06-30_BTC_0.0_500.0_5_True_False_True_-0.005_14400_4h_filter_price.json",
    "/Users/rayxu/Downloads/nuts_am/log/BTC/25-08-11/2025-05-01-2025-05-30_BTC_0.0_500.0_5_True_False_True_-0.005_14400_4h_filter_price.json",
]

GROUPS = {
    "Exp(-0.01)": Exp_m0p01,
    "ControlGroup": ControlGroup,
    "Exp(-0.005)": Exp_m0p005,
}
COL_ORDER = ["Exp(-0.01)", "ControlGroup", "Exp(-0.005)"]

# —— 不用 re：从文件名取起始月份 YYYY-MM —— #
def period_label_from_path(path: str) -> str:
    name = os.path.basename(path)                         # e.g. '2025-08-01-2025-08-07_BTC_...'
    first_token = name.split("_", 1)[0]                   # '2025-08-01-2025-08-07'
    # 起始日期在最前面，直接取前 7 位 -> 'YYYY-MM'
    # 假设文件名规范始终以起始日期开头
    return first_token[:7] if len(first_token) >= 7 else "Unknown"

# —— 主逻辑：直接用你的函数 —— #
def build_three_group_monthly_tables():
    period_to_data = {}
    for group_name, paths in GROUPS.items():
        for p in paths:
            period = period_label_from_path(p)
            stats = analyze_slippage_backtest(p)  # 你的函数，返回 dict
            period_to_data.setdefault(period, {})
            period_to_data[period][group_name] = pd.Series(stats)

    dfs_by_period = {}
    for period, data in period_to_data.items():
        df = pd.concat(data, axis=1)
        df = df.reindex(columns=COL_ORDER)  # 列顺序固定：实验(-0.01)、对照、实验(-0.005)
        dfs_by_period[period] = df

    # 返回四个常用变量 + 按期字典
    return (
        dfs_by_period.get("2025-05"),
        dfs_by_period.get("2025-06"),
        dfs_by_period.get("2025-07"),
        dfs_by_period.get("2025-08"),
        dfs_by_period,
    )

# 用法：
df_2025_05, df_2025_06, df_2025_07, df_2025_08, dfs_all = build_three_group_monthly_tables()
# 然后直接使用这四个 df 即可（不保存不打印）


In [None]:
df_2025_06

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_soon_okx_binance_01_2.csv')

In [None]:
df['Createtime'] = pd.to_datetime(df['Createtime'])
df[df['Createtime'] >= pd.to_datetime('2025-08-12 16:07:00')]


In [None]:
df['SR'].mean()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-13/2025-08-01-2025-08-06_ETH_0.0_500.0_5_True_False_False_-0.01_14400_DynamicHedgingTime_36030_Type1.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-13/2025-08-01-2025-08-06_ETH_0.0_500.0_5_True_False_False_-0.01_14400_.json')

In [None]:
stats1 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-13/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_-0.01_14400_.json')
stats2 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-13/2025-08-07-2025-08-13_ETH_0.0_500.0_60_True_False_False_-0.01_14400_.json')
stats3 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-13/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_-0.01_14400_DynamicHedgingTime_36030_Type1.json')
stats4 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-14/2025-08-07-2025-08-13_ETH_0.0_500.0_60_True_False_True_0.0_3_DynamicHedging_with_filter.json')
stats5 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_Test_DynamicHedgingTime_EWMA_vol5_EWMA60.json')
stats6 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json')


In [None]:
# stats5 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_Test_DynamicHedgingTime_EWMA.json')


In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_EWMA_EWMA5_EWMA60.json')

In [None]:
# truncated 2 side EWMA
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_360301440_type1_vol5_EWMA5_EWMA60_halflife_900.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-18/2025-08-07-2025-08-13_ETH_0.0_500.0_5_True_False_False_0.0_3_DynamicHedgingTime_with_truncated_1side_EWMA_1.05_0.95_5s.json')

In [None]:
df = pd.DataFrame([stats1, stats2, stats3,stats4,stats5,stats6]).transpose()
df.columns = ['5s', '60s', '动态切换','动态切换+filter','动态切换+EWMA一边','动态切换 + truncated EWMA']
df

# 动态对冲实盘结果

In [None]:
_,_,_,df2 = analyze_slippage('/Users/rayxu/Downloads/order.eth_okx_binance_08_2 (4).csv',starttime='2025-07-22 00:00:00',endtime='2025-08-25 03:30:00')

In [None]:
df['Createtime'] = pd.to_datetime(df['Createtime'])

In [None]:
df[(df['Createtime']>=pd.to_datetime('2025-08-21 01:40:00')) & (df['Createtime']<=pd.to_datetime('2025-08-21 01:49:00'))].sort_values(by = 'Createtime')

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.eth_okx_binance_09_2 (7).csv',starttime='2025-07-22 00:00:00',endtime='2025-08-25 03:30:00')
df

In [None]:
df['hedgeType_timeParam'].value_counts()

In [None]:
df_merged

In [None]:
# 把df和df2 拼起来（用merge_asof),时间容忍度设成100ms，然后筛选hedgeType_timeParam 为true_1m0s或者false_1m0s，统计一下他们的slippage的差，然后看在时序上，是不是集中在某一段时间，还是比较均匀？
import pandas as pd

# 确保 Createtime 列为 datetime 类型
df['Createtime'] = pd.to_datetime(df['Createtime'])
df2['Createtime'] = pd.to_datetime(df2['Createtime'])

# merge_asof 只能用于排序后的 DataFrame，且 on 的列必须为 datetime 或 numeric
# 但还要确保 Createtime 没有重复值，否则 merge_asof 会报错
df_sorted = df.sort_values('Createtime').drop_duplicates(subset=['Createtime'])
df2_sorted = df2.sort_values('Createtime').drop_duplicates(subset=['Createtime'])

# merge_asof 拼接，时间容忍度 100ms
df_merged = pd.merge_asof(
    df_sorted,
    df2_sorted,
    on='Createtime',
    direction='nearest',
    tolerance=pd.Timedelta('100ms'),
    suffixes=('_dynamic', '_fixed')
)

# 2. 筛选 hedgeType_timeParam_dynamic 为 true_1m0s 或 false_1m0s
mask = df_merged['hedgeType_timeParam_dynamic'].isin(['true_1m0s', 'false_1m0s'])
filtered = df_merged[mask].copy()

# 3. 统计 slippage 的差
filtered['slippage_diff'] = filtered['slippage_dynamic'] - filtered['slippage_fixed']

# 4. 查看 slippage_diff 的描述性统计和分位数
desc = filtered['slippage_diff'].describe()
percentiles = filtered['slippage_diff'].quantile([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])
print(desc)
print("分位数:")
for p, v in percentiles.items():
    print(f"{int(p*100)}%: {v}")

# 5. 按时间画出 slippage_diff 的时序分布
import matplotlib.pyplot as plt
plt.figure(figsize=(15,4))
plt.plot(filtered['Createtime'], filtered['slippage_diff'], marker='.', linestyle='none', alpha=0.5)
plt.xlabel('Createtime')
plt.ylabel('slippage_diff')
plt.title('Slippage Difference Over Time (true_1m0s/false_1m0s)')
plt.show()

In [None]:
filtered[filtered['slippage_diff']>0.002][['Createtime','ESR_dynamic','hedgeType_timeParam_dynamic','slippage_dynamic','slippage_fixed','slippage_diff']]

In [None]:
# 把df和df2 拼起来（用merge_asof),时间容忍度设成100ms，然后筛选hedgeType_timeParam 为true_1m0s或者false_1m0s，统计一下他们的slippage的差，然后看在时序上，是不是集中在某一段时间，还是比较均匀？
import pandas as pd

# 确保 Createtime 列为 datetime 类型
df['Createtime'] = pd.to_datetime(df['Createtime'])
df2['Createtime'] = pd.to_datetime(df2['Createtime'])

# merge_asof 只能用于排序后的 DataFrame，且 on 的列必须为 datetime 或 numeric
# 但还要确保 Createtime 没有重复值，否则 merge_asof 会报错
df_sorted = df.sort_values('Createtime').drop_duplicates(subset=['Createtime'])
df2_sorted = df2.sort_values('Createtime').drop_duplicates(subset=['Createtime'])

# merge_asof 拼接，时间容忍度 100ms
df_merged = pd.merge_asof(
    df_sorted,
    df2_sorted,
    on='Createtime',
    direction='nearest',
    tolerance=pd.Timedelta('100ms'),
    suffixes=('_dynamic', '_fixed')
)

# 2. 筛选 hedgeType_timeParam_dynamic 为 true_1m0s 或 false_1m0s
# mask = df_merged['hedgeType_timeParam_dynamic'].isin(['true_1m0s', 'false_1m0s'])
# mask = df_merged['hedgeType_timeParam_dynamic'].isin(['true_1m0s'])
mask = df_merged['hedgeType_timeParam_dynamic'].isin(['false_1m0s'])
# mask = df_merged['hedgeType_timeParam_dynamic'].isin(['true_5s','false_5s'])
filtered = df_merged[mask].copy()
# 3. 统计 slippage 的差
filtered['slippage_diff'] = filtered['slippage_dynamic'] - filtered['slippage_fixed']
# filtered = filtered[filtered['slippage_diff']<0.002]

# 4. 查看 slippage_diff 的描述性统计和分位数
desc = filtered['slippage_diff'].describe()
percentiles = filtered['slippage_diff'].quantile([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])
print(desc)
print("分位数:")
for p, v in percentiles.items():
    print(f"{int(p*100)}%: {v}")

# 5. 按时间画出 slippage_diff 的时序分布
import matplotlib.pyplot as plt
plt.figure(figsize=(15,4))
plt.plot(filtered['Createtime'], filtered['slippage_diff'], marker='.', linestyle='none', alpha=0.5)
plt.xlabel('Createtime')
plt.ylabel('slippage_diff')
plt.title('Slippage Difference Over Time (true_1m0s/false_1m0s)')
plt.show()

In [None]:
# slippage_diff 的 频率分布呢？
filtered['slippage_diff'].hist(bins=100)

In [None]:
df[1114:]

In [None]:
df['hedgeType_timeParam'].value_counts()

In [None]:
df[df['hedgeType_timeParam'] == 'false_5s']

In [None]:
df['hedgeType_timeParam'].value_counts()

In [None]:
4/300

In [None]:
df2['TimeUsed'].describe()

In [None]:
df['TimeUsed'].describe()

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.eth_okx_binance_09_2.csv')

In [None]:
df

In [None]:
df[df.slippage <-0.001]

In [None]:
df

In [None]:

import json
import pandas as pd
import numpy as np

# 读取文件
with open("/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-14/price_and_vol_ratio", "r") as f:
    data = json.load(f)

# 转成 DataFrame
df = pd.DataFrame(data)

# 把 time 转成 pandas 时间类型，自动推断格式（mixed），以避免 ValueError
df['time'] = pd.to_datetime(df['time'], format='mixed', errors='coerce')

df['vol_ratio_normalized'] = df['vol_ratio'] / np.sqrt(12)
df['vol_ratio_normalized'].describe()
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df['time'],
    y=df['vol_ratio_normalized'],
    mode='lines',
    name='vol_ratio_normalized'
))
fig.update_layout(
    title='vol_ratio_normalized over time',
    xaxis_title='Time',
    yaxis_title='vol_ratio_normalized',
    width=2000,   # 放大宽度
    height=800    # 放大高度
)
fig.show()

In [None]:
df[1000:]['vol_5s'].describe()

# 小时调仓和每日调仓对比（+PNL图）

In [None]:
import pandas as pd
import plotly.express as px

def plot_pnl_from_json(json_path: str):
    """
    读取逐行 JSON 文件，去掉 pnl = cash 的行和 pnl < 0 的行，并画出 pnl 随时间的变化曲线
    """
    # 逐行读取 json
    df = pd.read_json(json_path, lines=True)
    
    # 转换时间格式
    df['time'] = pd.to_datetime(df['time'])
    
    # 去掉 pnl == cash 的行（假设有 'cash' 列）
    # if 'cash' in df.columns and 'pnl' in df.columns:
    #     df = df[df['pnl'] != df['cash']]
    # df = df[df['pos'] == 0.0]
    # # 去掉 pnl < 0 的行
    # if 'pnl' in df.columns:
    #     df = df[df['pnl'] >= 0]
    
    # 排序（保险起见）
    df = df.sort_values('time')
    
    # 画图
    fig = px.line(
        df,
        x='time',
        y='pnl',
        title="PnL 随时间变化（去除 pnl = cash 且去除 pnl < 0）",
        labels={"time": "时间", "pnl": "PnL"}
    )
    fig.update_traces(mode="lines+markers")
    fig.show()

# 用法示例
# plot_pnl_from_json("/Users/rayxu/Downloads/example.json")


In [None]:
plot_pnl_from_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_daily_update.json')

In [None]:
import plotly.graph_objects as go

import matplotlib.pyplot as plt

def plot_trade_net_curve(json_path):
    """
    读取逐行 JSON 交易记录，计算净值曲线和pos随时间变化，并用 matplotlib 画图（只用折线，不用大圆点）。
    统计整体换手率 = 总成交额/1000000/total_days，放在图片标题上。
    """
    import numpy as np

    trade_record = pd.read_json(json_path, lines=True)
    trade_record['time'] = pd.to_datetime(trade_record['time'], errors='coerce')
    trade_record.set_index('time', inplace=True)
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    trade_record['pos'] = (trade_record['volume'] * trade_record['flag']).cumsum()
    trade_record['net'] = (
        (-trade_record['volume'] * trade_record['price'] * trade_record['flag']).cumsum()
        + trade_record['pos'] * trade_record['price']
        + (trade_record['volume'] * trade_record['price'] * 0.00005).cumsum()
    )
    trade_record['net_diff'] = trade_record['net'].diff()

    # 计算总成交额
    trade_record['turnover'] = trade_record['volume'] * trade_record['price']
    total_turnover = trade_record['turnover'].sum()

    # 计算总天数
    if len(trade_record) > 0:
        min_time = trade_record.index.min()
        max_time = trade_record.index.max()
        total_days = (max_time - min_time).total_seconds() / 86400
        # 至少算1天，避免极端情况
        total_days = max(total_days, 1)
    else:
        total_days = 1

    turnover_rate = total_turnover / 1_000_000 / total_days

    fig, ax1 = plt.subplots(figsize=(12, 6))

    # 画净值曲线（只用折线，不加marker）
    ax1.plot(trade_record.index, trade_record['net'], linestyle='-', label='Net Value', color='tab:blue')
    ax1.set_xlabel("Time")
    ax1.set_ylabel("PnL", color='tab:blue')
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.legend(loc='upper left')
    ax1.grid(True)

    # 画pos曲线，使用第二y轴（只用折线，不加marker）
    ax2 = ax1.twinx()
    ax2.plot(trade_record.index, trade_record['pos'], linestyle='--', label='Pos', color='tab:orange')
    ax2.set_ylabel("Pos", color='tab:orange')
    ax2.tick_params(axis='y', labelcolor='tab:orange')
    ax2.legend(loc='upper right')

    plt.title(f"Pnl Curve | Turnover: {turnover_rate:.4f} ")
    plt.tight_layout()
    plt.show()

# 用法示例
#plot_trade_net_curve('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_daily_update.json')

In [None]:
plot_pnl_from_json('/Users/rayxu/Downloads/nuts_am/log/BTC/25-07-18/2025-07-01-2025-07-13_1752883486.132423.json')

In [None]:
plot_trade_net_curve('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-20/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_3_day_update.json')

In [None]:
plot_trade_net_curve('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_daily_update.json')

In [None]:
plot_trade_net_curve('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_hourly_update.json')

In [None]:
import plotly.graph_objs as go

trade_record = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_hourly_update.json', lines=True)
trade_record['time'] = pd.to_datetime(trade_record['time'], errors='coerce')
trade_record.set_index('time', inplace=True)
trade_record['flag'] = 1
trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
trade_record['pos'] = (trade_record['volume'] * trade_record['flag']).cumsum()
trade_record['net'] = (
    (-trade_record['volume'] * trade_record['price'] * trade_record['flag']).cumsum()
    + trade_record['pos'] * trade_record['price']
    + (trade_record['volume'] * trade_record['price'] * 0.00005).cumsum()
)
trade_record['net_diff'] = trade_record['net'].diff()
trade_record['net_pos'] = (trade_record['volume'] * trade_record['flag']).cumsum()

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=trade_record.index,
    y=trade_record['net_pos'],
    mode='lines',
    name='net_pos'
))
fig.update_layout(
    title='net_pos vs Time',
    xaxis_title='Time',
    yaxis_title='net_pos',
    template='plotly_white',
    width=1600,   # 放大图像宽度
    height=800    # 放大图像高度
)
fig.show()

In [None]:
trade_record['net_pos'][95000:]

In [None]:
trade_record['price'].plot()

In [None]:
trade_record['net'].tail(1000)

In [None]:
# import matplotlib.pyplot as plt

# trade_record = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_daily_update.json', lines=True)
# trade_record['time'] = pd.to_datetime(trade_record['time'], errors='coerce')
# plt.figure(figsize=(12, 6))
# plt.plot(trade_record.loc[trade_record['type'] == 'Maker_ask', 'time'], trade_record.loc[trade_record['type'] == 'Maker_ask', 'pos'], label='pos')
# plt.xlabel('Time')
# plt.ylabel('Pos')
# plt.title('Pos vs Time (Maker_ask)')
# plt.legend()
# plt.grid(True)
# plt.show()

In [None]:
trade_record

In [None]:
trade_record['pos'].head(20)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_daily_update.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-19/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_hourly_update.json'
json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-20/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_3_day_update.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
trade_record3 = pd.read_json(json_path3, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
trade_record3['time'] = pd.to_datetime(trade_record3['time'], errors='coerce')

# 只保留8/13 10:00之前的数据
cutoff = pd.Timestamp('2025-08-13 10:00:00')
trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()

# 计算flag
for df in [trade_record1, trade_record2, trade_record3]:
    df['flag'] = 1
    df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# 计算pos
for df in [trade_record1, trade_record2, trade_record3]:
    df['pos'] = (df['volume'] * df['flag']).cumsum()

# 计算net
for df in [trade_record1, trade_record2, trade_record3]:
    df['net'] = (
        (-df['volume'] * df['price'] * df['flag']).cumsum()
        + df['pos'] * df['price']
        + (df['volume'] * df['price'] * 0.00005).cumsum()
    )

# 计算年化收益率和换手率
def calc_stats(df, principal):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df['net'].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 2_000_000

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)
ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1['time'], trade_record1['net'], label=f'daily update\nret: {ann_return1:.2%}\nturnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2['time'], trade_record2['net'], label=f'hourly update\nret: {ann_return2:.2%}\nturnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record3['time'], trade_record3['net'], label=f'3 day update\nret: {ann_return3:.2%}\nturnover: {turnover3:.2f}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 过滤OIR


In [None]:
pd.read_csv('/Users/rayxu/Downloads/order.btc_okx_binance_08_2.csv')

In [None]:
analyze_slippage('/Users/rayxu/Downloads/order.btc_okx_binance_08_2.csv')

In [None]:
pd.read_csv('/Users/rayxu/Downloads/order.btc_okx_binance_09_2.csv')

In [None]:
analyze_slippage('/Users/rayxu/Downloads/order.btc_okx_binance_09_2.csv',starttime=pd.to_datetime('2025-09-05 00:00:00'))

In [None]:
# 反向过滤
stats1 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-03/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.8.json')

In [None]:
# 八月整对照
stats2 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_对照组.json')

In [None]:
# 8月整
stats3 = analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.5_只取恶劣情况.json')

In [None]:
df = pd.concat([pd.DataFrame([stats1]).T, pd.DataFrame([stats2]).T, pd.DataFrame([stats3]).T],axis=1)
df.columns = ['反向过滤OIR_0.8', '八月整对照', '过滤OIR_0.5']
df

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_对照组.json'
# json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-13_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_test_反向过滤最近一个tick的_oir_0.8.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.5_只取恶劣情况.json'
json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-03/2025-08-01-2025-08-31_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.8.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
trade_record3 = pd.read_json(json_path3, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
trade_record3['time'] = pd.to_datetime(trade_record3['time'], errors='coerce')

# 只保留8/13 10:00之前的数据
# cutoff = pd.Timestamp('2025-08-13 10:00:00')
# trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
# trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()

# 计算flag
for df in [trade_record1, trade_record2, trade_record3]:
    df['flag'] = 1
    df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# 计算pos
for df in [trade_record1, trade_record2, trade_record3]:
    df['pos'] = (df['volume'] * df['flag']).cumsum()

# 计算net
for df in [trade_record1, trade_record2, trade_record3]:
    df['net'] = (
        (-df['volume'] * df['price'] * df['flag']).cumsum()
        + df['pos'] * df['price']
        + (df['volume'] * df['price'] * 0.00005).cumsum()
    )

# 计算年化收益率和换手率
def calc_stats(df, principal):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df['net'].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 16_000_000

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)
ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1['time'], trade_record1['net'], label=f'Without OIR\nret: {ann_return1:.2%}\nturnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2['time'], trade_record2['net'], label=f'With OIR Type1\nret: {ann_return2:.2%}\nturnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record3['time'], trade_record3['net'], label=f'With OIR Type2\nret: {ann_return3:.2%}\nturnover: {turnover3:.2f}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve \n(Initial Capital 16000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.8_正确版.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-30/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.5_只取恶劣情况.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_OIR_ETH对照.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.5.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-13_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_test_反向过滤最近一个tick的_oir_0.8.json')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_OIR_ETH对照.json'
# json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-29/2025-08-07-2025-08-13_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_test_反向过滤最近一个tick的_oir_0.8.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-30/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.5_只取恶劣情况.json'
json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-09-02/2025-08-07-2025-08-12_ETH_0.0_2000.0_5_True_False_False_-0.0006_30_反向过滤OIR_0.8_正确版.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
trade_record3 = pd.read_json(json_path3, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
trade_record3['time'] = pd.to_datetime(trade_record3['time'], errors='coerce')

# 只保留8/13 10:00之前的数据
# cutoff = pd.Timestamp('2025-08-13 10:00:00')
# trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
# trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()

# 计算flag
for df in [trade_record1, trade_record2, trade_record3]:
    df['flag'] = 1
    df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# 计算pos
for df in [trade_record1, trade_record2, trade_record3]:
    df['pos'] = (df['volume'] * df['flag']).cumsum()

# 计算net
for df in [trade_record1, trade_record2, trade_record3]:
    df['net'] = (
        (-df['volume'] * df['price'] * df['flag']).cumsum()
        + df['pos'] * df['price']
        + (df['volume'] * df['price'] * 0.00005).cumsum()
    )

# 计算年化收益率和换手率
def calc_stats(df, principal):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df['net'].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 16_000_000

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)
ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1['time'], trade_record1['net'], label=f'Without OIR\nret: {ann_return1:.2%}\nturnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2['time'], trade_record2['net'], label=f'With OIR Type1\nret: {ann_return2:.2%}\nturnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record3['time'], trade_record3['net'], label=f'With OIR Type2\nret: {ann_return3:.2%}\nturnover: {turnover3:.2f}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve \n(Initial Capital 16000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-10-22/2025-10-18-2025-10-21_ETH_0.0_2000.0_5_True_False_False_0.0_0_.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/APT/25-10-22/2025-10-18-2025-10-21_APT_0.0_2000.0_5_False_False_False_0.0_0__fix_tick4_固定阈值.json')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/APT/25-10-22/2025-10-18-2025-10-21_APT_0.0_2000.0_5_False_False_False_0.0_0__fix_tick4_固定阈值.json'

# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)


# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')


# 只保留8/13 10:00之前的数据
# cutoff = pd.Timestamp('2025-08-13 10:00:00')
# trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
# trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()

# 计算flag
for df in [trade_record1]:
    df['flag'] = 1
    df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# 计算pos
for df in [trade_record1]:
    df['pos'] = (df['volume'] * df['flag']).cumsum()

# 计算net
for df in [trade_record1]:
    df['net'] = (
        (-df['volume'] * df['price'] * df['flag']).cumsum()
        + df['pos'] * df['price']
        + (df['volume'] * df['price'] * 0.00005).cumsum()
    )

# 计算年化收益率和换手率
def calc_stats(df, principal):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df['net'].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 16_000_000

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)


# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1['time'], trade_record1['net'], label=f'Without OIR\nret: {ann_return1:.2%}\nturnover: {turnover1:.2f}', color='tab:blue')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve \n(Initial Capital 16000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
3831.8/3809.9-1

In [None]:
walk_by_midprice: False
interval: 100
order_limit: 100
leverage: 3
order_size: 1
order_amount: 25 #没被用到
sr_open_limit: 0.1 #没被用到
d: 0
max_pos: 2000
max_pos_ex1: 30 # 没被用到
wait2Lock_interval: 5
IsDynamicHedgingTime: False

# 新下单算法


In [None]:
analyze_slippage_backtest("/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-07/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__测试normal模式_滑点和开仓价差比.json")

In [None]:
analyze_slippage_backtest("/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-06/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_f'_测试normal模式_{min_spread_limit_bid}_{min_spread_limit_ask}'.json")

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-06/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__测试normal模式_对照组.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-06/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__测试normal模式.json')

# 利用反转

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_朴素模式.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_UseReversal1110.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_UseReversal1110_增加0.00005的价差限制.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-13/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_UseReversal1110_限制premium最大为0.00001.json')

In [None]:
stats, sell_percentiles, buy_percentiles, df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (10).csv')

In [None]:
df

# AM回测

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-21/2025-11-01-2025-11-21_ETH_0.0_1000.0_5_False_False_False_0.0_0_获取AM曲线_11月.json'

# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')


# 只保留8/13 10:00之前的数据
cutoff = pd.Timestamp('2026-08-13 10:00:00')
trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()


# 计算flag
for df in [trade_record1]:
    df['flag'] = 1
    df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# 计算pos
for df in [trade_record1]:
    df['pos'] = (df['volume'] * df['flag']).cumsum()

# 计算net
for df in [trade_record1]:
    df['net'] = (
        (-df['volume'] * df['price'] * df['flag']).cumsum()
        + df['pos'] * df['price']
        + (df['volume'] * df['price'] * 0.00005).cumsum()
    )

# 计算年化收益率和换手率
def calc_stats(df, principal):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df['net'].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 2_000_000

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)


# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1['time'], trade_record1['net'], label=f'daily update\nret: {ann_return1:.2%}\nturnover: {turnover1:.2f}', color='tab:blue')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
df['pos'].plot()

In [None]:
df['net']

In [None]:
trade_record1

# 更长时间稳定性

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_朴素模式.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_长期时间稳定性叠加（10s）_-0.00007_0.00008.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_长期时间稳定性叠加（60s）_-0.00007_0.00008.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_True_False_False_0.0_0_benchmark.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_True_False_False_0.0_0_1min_稳定性.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-01-2025-10-31_ETH_0.0_2000.0_5_True_False_False_0.0_0_95 5 benchmark.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-01-2025-10-31_ETH_0.0_2000.0_5_True_False_False_0.0_0_95 5 更长时间稳定性.json')

In [None]:
from utils_Sep import *
symbol = 'ETH' 

okx_csv     = f'/Volumes/T7/Obentech/fundingRateData/okx/{symbol}-USDT-SWAP.csv'
binance_csv = f'/Volumes/T7/Obentech/fundingRateData/binance/{symbol}USDT.csv'
last_time = pd.to_datetime('2025-11-07 04:15:00')
start_time = last_time - pd.Timedelta(days=30) 
df_okx     = process_funding_time_v3(okx_csv, 'okx')
df_binance = process_funding_time_v3(binance_csv, 'binance')

df_b = df_binance[(df_binance['Time'] >= start_time) & (df_binance['Time'] < last_time)].copy()
df_o = df_okx[(df_okx['Time'] >= start_time) & (df_okx['Time'] < last_time)].copy()


df_o = df_o.drop_duplicates(subset='FundingTime', keep='last')
sum_okx = df_o['FundingRate'].sum()
df_b = df_b.drop_duplicates(subset='FundingTime', keep='last')
sum_bnb = df_b['FundingRate'].sum()
earn    = sum_okx - sum_bnb
day_start = last_time - pd.Timedelta(days=1)

sum_okx1 = df_o[df_o['Time'] >= day_start]['FundingRate'].sum()
sum_bnb1 = df_b[df_b['Time'] >= day_start]['FundingRate'].sum()
earn_1day = sum_okx1 - sum_bnb1

funding_interval_bn = int((df_b.iloc[-1]['FundingTime'] - df_b.iloc[-2]['FundingTime']).total_seconds() / 3600)
funding_interval_okx = int((df_o.iloc[-1]['FundingTime'] - df_o.iloc[-2]['FundingTime']).total_seconds() / 3600)

df_o.rename(columns={'FundingRate': 'FundingRate_okx'}, inplace=True)
df_b.rename(columns={'FundingRate': 'FundingRate_binance'}, inplace=True)

if funding_interval_bn == funding_interval_okx:
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime'), how='left')
elif funding_interval_bn > funding_interval_okx:
    df_o_agg = df_o.set_index('FundingTime').resample(f'{funding_interval_bn}h', label='right', closed='right')['FundingRate_okx'].sum().to_frame()
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o_agg, how='left')
else:
    df_b_agg = df_b.set_index('FundingTime').resample(f'{funding_interval_okx}h', label='right', closed='right')['FundingRate_binance'].sum().to_frame()
    funding_diff = df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime').join(df_b_agg, how='left')


funding_diff['funding_diff']     = funding_diff['FundingRate_okx'] - funding_diff['FundingRate_binance']

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-01-2025-10-31_ETH_0.0_2000.0_5_True_False_False_0.0_0_95 5 benchmark.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-01-2025-10-31_ETH_0.0_2000.0_5_True_False_False_0.0_0_95 5 更长时间稳定性.json'
# json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-08-20/2025-07-01-2025-08-19_ETH_0.0_500.0_5_True_False_False_0.0_3_3_day_update.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
# trade_record3 = pd.read_json(json_path3, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
# trade_record3['time'] = pd.to_datetime(trade_record3['time'], errors='coerce')

# 只保留8/13 10:00之前的数据
cutoff = pd.Timestamp('2026-08-13 10:00:00')
trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()
# trade_record1.dropna(subset=['slippage'], inplace=True)
# trade_record2.dropna(subset=['slippage'], inplace=True)
# # 计算flag
# for df in [trade_record1, trade_record2]:
#     df['flag'] = 1
#     df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# # 计算pos
# for df in [trade_record1, trade_record2]:
#     df['pos'] = (df['volume'] * df['flag']).cumsum()

# # 计算net
# for df in [trade_record1, trade_record2]:
#     df['net'] = ((-df['volume'] * df['price'] * df['flag']).cumsum()+ df['pos'] * df['price']+ (df['volume'] * df['price'] * 0.00005).cumsum())

def process_trade_record(trade_record):
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    df_h = trade_record[trade_record['hedge_oid'].notna()].copy()
    df_base = trade_record[['oid', 'price', 'volume']]
    df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
    df_merge['pos'] = (df_merge['volume'] * df_merge['flag']).cumsum()
    df_merge['spread'] = df_merge['price'] - df_merge['price_base']
    df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
    df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
    return df_merge
# 计算年化收益率和换手率

def calc_stats(df, principal,target_column='net'):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df[target_column].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 45_00_000
trade_record1 = process_trade_record(trade_record1)
trade_record2 = process_trade_record(trade_record2)

trade_record1, df1 = total_pnl(trade_record1,funding_diff)
trade_record2, df2 = total_pnl(trade_record2,funding_diff)

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)

total_ann_return1, _, _ = calc_stats(trade_record1, principal,target_column='total_pnl')
total_ann_return2, _, _ = calc_stats(trade_record2, principal,target_column='total_pnl')
# ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1[trade_record1['pos'] == 0]['time'], trade_record1[trade_record1['pos'] == 0]['total_pnl'], label=f'BenchMark\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2[trade_record2['pos'] == 0]['time'], trade_record2[trade_record2['pos'] == 0]['total_pnl'], label=f'Longer Stability\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')
# plt.plot(trade_record3['time'], trade_record3['net'], label=f'3 day update\nret: {ann_return3:.2%}\nturnover: {turnover3:.2f}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# 画图, funding_pnl

funding_ret1 = df1['cum_fr_pnl'].iloc[-1]/principal/days1*365
funding_ret2 = df2['cum_fr_pnl'].iloc[-1]/principal/days2*365

plt.figure(figsize=(12, 6))
plt.plot(df1['FundingTime'], df1['cum_fr_pnl'], label=f'BenchMark\nret: {funding_ret1:.2%}', color='tab:blue')
plt.plot(df2['FundingTime'], df2['cum_fr_pnl'], label=f'Longer Stability\nret: {funding_ret2:.2%}', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('Funding PnL Curve')
plt.legend()

In [None]:
df

In [None]:
def total_pnl(trade_record,funding_diff):
    funding_diff = funding_diff.sort_values('FundingTime')
    funding_diff = funding_diff[(funding_diff.index <= trade_record['time'].iloc[-1]) & (funding_diff.index >= trade_record['time'].iloc[0])]
    trade_record = trade_record.sort_values('time')
    df = pd.merge_asof(
        funding_diff.reset_index(),
        trade_record[['time', 'pos','price']],
        left_on='FundingTime',
        right_on='time',
        direction='backward'
    )

    df['fr_pnl'] = df['funding_diff'] * df['pos'] * df['price']
    df['cum_fr_pnl'] = df['fr_pnl'].cumsum()

    final_result = pd.merge_asof(trade_record,df[['FundingTime','cum_fr_pnl']],left_on='time',right_on='FundingTime', direction='backward')

    final_result = final_result.dropna()
    final_result['total_pnl'] = final_result['net'] + final_result['cum_fr_pnl']
    return final_result, df

In [None]:
funding_diff = funding_diff.sort_values('FundingTime')
funding_diff = funding_diff[(funding_diff.index <= trade_record1['time'].iloc[-1]) & (funding_diff.index >= trade_record1['time'].iloc[0])]
trade_record1 = trade_record1.sort_values('time')

df = pd.merge_asof(
    funding_diff.reset_index(),
    trade_record1[['time', 'pos','price']],
    left_on='FundingTime',
    right_on='time',
    direction='backward'
)

df['fr_pnl'] = df['funding_diff'] * df['pos'] * df['price']
df['cum_fr_pnl'] = df['fr_pnl'].cumsum()

final_result = pd.merge_asof(trade_record1,df[['FundingTime','cum_fr_pnl']],left_on='time',right_on='FundingTime', direction='backward')

final_result = final_result.dropna()
final_result['total_pnl'] = final_result['net'] + final_result['cum_fr_pnl']

In [None]:
funding_diff

In [None]:
trade_record1

In [None]:
(trade_record1['volume'] * trade_record1['price'] * 0.00005).cumsum()

In [None]:
trade_record1.head(30)

In [None]:
trade_record1['pos'].head(30).plot()

In [None]:
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_True_False_False_0.0_0_benchmark.json'
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record1['flag'] = 1
trade_record1.loc[trade_record1['type'] == 'Maker_ask', 'flag'] = -1

In [None]:
trade_record1

In [None]:
df_h = trade_record1[trade_record1['hedge_oid'].notna()].copy()
df_base = trade_record1[['oid', 'price', 'volume']]
df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))


df_merge['spread'] = df_merge['price'] - df_merge['price_base']
df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
# df_merge['pos'] = (-df_merge['volume'] * df_merge['flag']).cumsum()
# if IsOkxRebate:
#     trades_2['fee'] = trades_2['Order2FilledPrice'] * trades_2['Order2FilledAmount'] *(-0.00005-0.00005)
# else:
#     trades_2['fee'] = trades_2['Order2FilledPrice'] * trades_2['Order2FilledAmount'] *(-0.00005)
# trades_2['fee'] = trades_2['fee'].astype(float)
df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
df_merge

In [None]:
df_h = trade_record1[trade_record1['hedge_oid'].notna()].copy()

df_base = trade_record1[['oid', 'price', 'volume', 'flag']].rename(
    columns={'price': 'price_base', 'volume': 'volume_base', 'flag': 'flag_base'}
)

# 拼接 hedge 对应的 base 单价格和数量
df_merge = df_h.merge(df_base, left_on='hedge_oid', right_on='oid', how='left')

# direction = base flag
df_merge['direction'] = df_merge['flag_base']

# 价差
df_merge['spread'] = df_merge['price'] - df_merge['price_base']

# 单对 hedge 盈亏
df_merge['pnl'] = df_merge['direction'] * df_merge['spread'] * df_merge['volume_base']

# 手续费
df_merge['fee_base']  = df_merge['price_base'] * df_merge['volume_base'] * -0.00005
df_merge['fee_hedge'] = df_merge['price']      * df_merge['volume_base'] * -0.00005

df_merge['pnl_net'] = df_merge['pnl'] - df_merge['fee_base'] - df_merge['fee_hedge']


In [None]:
df_merge[df_merge['pos'] == 0]['net'].plot()

In [None]:
df_merge['net'].plot()

In [None]:
trade_record1.dropna(subset=['slippage'], inplace=True)
trade_record1['pos'] = (trade_record1['volume'] * trade_record1['flag']).cumsum()

In [None]:
trade_record1['pos'].plot()

# 两种模式的资金费率

In [None]:
from utils_Sep import *
symbol = 'ETH' 

okx_csv     = f'/Volumes/T7/Obentech/fundingRateData/okx/{symbol}-USDT-SWAP.csv'
binance_csv = f'/Volumes/T7/Obentech/fundingRateData/binance/{symbol}USDT.csv'
last_time = pd.to_datetime('2025-11-26 04:15:00')
start_time = last_time - pd.Timedelta(days=60) 
df_okx     = process_funding_time_v3(okx_csv, 'okx')
df_binance = process_funding_time_v3(binance_csv, 'binance')

df_b = df_binance[(df_binance['Time'] >= start_time) & (df_binance['Time'] < last_time)].copy()
df_o = df_okx[(df_okx['Time'] >= start_time) & (df_okx['Time'] < last_time)].copy()


df_o = df_o.drop_duplicates(subset='FundingTime', keep='last')
sum_okx = df_o['FundingRate'].sum()
df_b = df_b.drop_duplicates(subset='FundingTime', keep='last')
sum_bnb = df_b['FundingRate'].sum()
earn    = sum_okx - sum_bnb
day_start = last_time - pd.Timedelta(days=1)

sum_okx1 = df_o[df_o['Time'] >= day_start]['FundingRate'].sum()
sum_bnb1 = df_b[df_b['Time'] >= day_start]['FundingRate'].sum()
earn_1day = sum_okx1 - sum_bnb1

funding_interval_bn = int((df_b.iloc[-1]['FundingTime'] - df_b.iloc[-2]['FundingTime']).total_seconds() / 3600)
funding_interval_okx = int((df_o.iloc[-1]['FundingTime'] - df_o.iloc[-2]['FundingTime']).total_seconds() / 3600)

df_o.rename(columns={'FundingRate': 'FundingRate_okx'}, inplace=True)
df_b.rename(columns={'FundingRate': 'FundingRate_binance'}, inplace=True)

if funding_interval_bn == funding_interval_okx:
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime'), how='left')
elif funding_interval_bn > funding_interval_okx:
    df_o_agg = df_o.set_index('FundingTime').resample(f'{funding_interval_bn}h', label='right', closed='right')['FundingRate_okx'].sum().to_frame()
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o_agg, how='left')
else:
    df_b_agg = df_b.set_index('FundingTime').resample(f'{funding_interval_okx}h', label='right', closed='right')['FundingRate_binance'].sum().to_frame()
    funding_diff = df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime').join(df_b_agg, how='left')


funding_diff['funding_diff']     = funding_diff['FundingRate_okx'] - funding_diff['FundingRate_binance']

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.0001-0.00005.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00008-0.00007.json'
json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-25/2025-11-01-2025-11-20_ETH_0.0_2000.0_5_True_False_False_0.0_0_benchmark.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
trade_record3 = pd.read_json(json_path3, lines=True)

# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
trade_record3['time'] = pd.to_datetime(trade_record3['time'], errors='coerce')

# 只保留8/13 10:00之前的数据
# cutoff = pd.Timestamp('2026-08-13 10:00:00')
# trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
# trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()
# trade_record1.dropna(subset=['slippage'], inplace=True)
# trade_record2.dropna(subset=['slippage'], inplace=True)
# # 计算flag
# for df in [trade_record1, trade_record2]:
#     df['flag'] = 1
#     df.loc[df['type'] == 'Maker_ask', 'flag'] = -1

# # 计算pos
# for df in [trade_record1, trade_record2]:
#     df['pos'] = (df['volume'] * df['flag']).cumsum()

# # 计算net
# for df in [trade_record1, trade_record2]:
#     df['net'] = ((-df['volume'] * df['price'] * df['flag']).cumsum()+ df['pos'] * df['price']+ (df['volume'] * df['price'] * 0.00005).cumsum())

def process_trade_record(trade_record):
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    df_h = trade_record[trade_record['hedge_oid'].notna()].copy()
    df_base = trade_record[['oid', 'price', 'volume']]
    df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
    df_merge['pos'] = (df_merge['volume'] * df_merge['flag']).cumsum()
    df_merge['spread'] = df_merge['price'] - df_merge['price_base']
    df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
    df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
    df_merge.dropna(inplace=True)
    return df_merge
# 计算年化收益率和换手率

def calc_stats(df, principal,target_column='net'):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df[target_column].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 45_00_000
trade_record1 = process_trade_record(trade_record1)
trade_record2 = process_trade_record(trade_record2)
trade_record3 = process_trade_record(trade_record3)



trade_record1, df1 = total_pnl(trade_record1,funding_diff)
trade_record2, df2 = total_pnl(trade_record2,funding_diff)
trade_record3, df3 = total_pnl(trade_record3,funding_diff)

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)
ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

total_ann_return1, _, _ = calc_stats(trade_record1, principal,target_column='total_pnl')
total_ann_return2, _, _ = calc_stats(trade_record2, principal,target_column='total_pnl')
total_ann_return3, _, _ = calc_stats(trade_record3, principal,target_column='total_pnl')
# ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1[trade_record1['pos'] == 0]['time'], trade_record1[trade_record1['pos'] == 0]['total_pnl'], label=f'-0.0001-0.00005\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2[trade_record2['pos'] == 0]['time'], trade_record2[trade_record2['pos'] == 0]['total_pnl'], label=f'-0.00008-0.00007\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record3[trade_record3['pos'] == 0]['time'], trade_record3[trade_record3['pos'] == 0]['total_pnl'], label=f'95% 5%\nret: {total_ann_return3:.2%} turnover: {turnover3:.2f}', color='tab:green')
# plt.plot(trade_record3['time'], trade_record3['net'], label=f'3 day update\nret: {ann_return3:.2%}\nturnover: {turnover3:.2f}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# 画图, funding_pnl

funding_ret1 = df1['cum_fr_pnl'].iloc[-1]/principal/days1*365
funding_ret2 = df2['cum_fr_pnl'].iloc[-1]/principal/days2*365
funding_ret3 = df3['cum_fr_pnl'].iloc[-1]/principal/days3*365

plt.figure(figsize=(12, 6))
plt.plot(df1['FundingTime'], df1['cum_fr_pnl'], label=f'BenchMark\nret: {funding_ret1:.2%}', color='tab:blue')
plt.plot(df2['FundingTime'], df2['cum_fr_pnl'], label=f'Longer Stability\nret: {funding_ret2:.2%}', color='tab:orange')
plt.plot(df3['FundingTime'], df3['cum_fr_pnl'], label=f'95% 5%\nret: {funding_ret3:.2%}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('Funding PnL Curve')
plt.legend()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.0001-0.00005.json')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.0001-0.00005.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00008-0.00007.json'
json_path3 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00015-0.00005.json'
json_path4 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_True_False_False_0.0_0_95% 5%.json'

# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)
trade_record3 = pd.read_json(json_path3, lines=True)
trade_record4 = pd.read_json(json_path4, lines=True)


# 只保留8/13 10:00之前的数据
# cutoff = pd.Timestamp('2026-08-13 10:00:00')
# trade_record1 = trade_record1[trade_record1['time'] < cutoff].copy()
# trade_record2 = trade_record2[trade_record2['time'] < cutoff].copy()
# trade_record3 = trade_record3[trade_record3['time'] < cutoff].copy()
# trade_record1.dropna(subset=['slippage'], inplace=True)
# trade_record2.dropna(subset=['slippage'], inplace=True)

def process_trade_record(trade_record):
    trade_record['time'] = pd.to_datetime(trade_record['time'], errors='coerce')
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    df_h = trade_record[trade_record['hedge_oid'].notna()].copy()
    df_base = trade_record[['oid', 'price', 'volume']]
    df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
    df_merge['pos'] = (df_merge['volume'] * df_merge['flag']).cumsum()
    df_merge['spread'] = df_merge['price'] - df_merge['price_base']
    df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
    df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
    df_merge.dropna(inplace=True)
    return df_merge
# 计算年化收益率和换手率

def calc_stats(df, principal,target_column='net'):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df[target_column].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

def total_pnl(trade_record,funding_diff):
    funding_diff = funding_diff.sort_values('FundingTime')
    funding_diff = funding_diff[(funding_diff.index <= trade_record['time'].iloc[-1]) & (funding_diff.index >= trade_record['time'].iloc[0])]
    trade_record = trade_record.sort_values('time')
    df = pd.merge_asof(
        funding_diff.reset_index(),
        trade_record[['time', 'pos','price']],
        left_on='FundingTime',
        right_on='time',
        direction='backward'
    )

    df['fr_pnl'] = df['funding_diff'] * df['pos'] * df['price']
    df['cum_fr_pnl'] = df['fr_pnl'].cumsum()

    final_result = pd.merge_asof(trade_record,df[['FundingTime','cum_fr_pnl']],left_on='time',right_on='FundingTime', direction='backward')

    final_result = final_result.dropna()
    final_result['total_pnl'] = final_result['net'] + final_result['cum_fr_pnl']
    return final_result, df

principal = 45_00_000
trade_record1 = process_trade_record(trade_record1)
trade_record2 = process_trade_record(trade_record2)
trade_record3 = process_trade_record(trade_record3)
trade_record4 = process_trade_record(trade_record4)


trade_record1, df1 = total_pnl(trade_record1,funding_diff)
trade_record2, df2 = total_pnl(trade_record2,funding_diff)
trade_record3, df3 = total_pnl(trade_record3,funding_diff)
trade_record4, df4 = total_pnl(trade_record4,funding_diff)

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)
ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)
ann_return4, turnover4, days4 = calc_stats(trade_record4, principal)

total_ann_return1, _, _ = calc_stats(trade_record1, principal,target_column='total_pnl')
total_ann_return2, _, _ = calc_stats(trade_record2, principal,target_column='total_pnl')
total_ann_return3, _, _ = calc_stats(trade_record3, principal,target_column='total_pnl')
total_ann_return4, _, _ = calc_stats(trade_record4, principal,target_column='total_pnl')
# ann_return3, turnover3, days3 = calc_stats(trade_record3, principal)

# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1[trade_record1['pos'] == 0]['time'], trade_record1[trade_record1['pos'] == 0]['total_pnl'], label=f'-0.0001-0.00005\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2[trade_record2['pos'] == 0]['time'], trade_record2[trade_record2['pos'] == 0]['total_pnl'], label=f'-0.00008-0.00007\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record3[trade_record3['pos'] == 0]['time'], trade_record3[trade_record3['pos'] == 0]['total_pnl'], label=f'-0.00015-0.00005%\nret: {total_ann_return3:.2%} turnover: {turnover3:.2f}', color='tab:green')
plt.plot(trade_record4[trade_record4['pos'] == 0]['time'], trade_record4[trade_record4['pos'] == 0]['total_pnl'], label=f'95% 5%\nret: {total_ann_return4:.2%} turnover: {turnover4:.2f}', color='tab:red')

plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# 画图, funding_pnl

funding_ret1 = df1['cum_fr_pnl'].iloc[-1]/principal/days1*365
funding_ret2 = df2['cum_fr_pnl'].iloc[-1]/principal/days2*365
funding_ret3 = df3['cum_fr_pnl'].iloc[-1]/principal/days3*365
funding_ret4 = df4['cum_fr_pnl'].iloc[-1]/principal/days4*365

plt.figure(figsize=(12, 6))
plt.plot(df1['FundingTime'], df1['cum_fr_pnl'], label=f'-0.0001-0.00005\nret: {funding_ret1:.2%}', color='tab:blue')
plt.plot(df2['FundingTime'], df2['cum_fr_pnl'], label=f'-0.00008-0.00007\nret: {funding_ret2:.2%}', color='tab:orange')
plt.plot(df3['FundingTime'], df3['cum_fr_pnl'], label=f'-0.00015-0.00005\nret: {funding_ret3:.2%}', color='tab:green')
plt.plot(df4['FundingTime'], df4['cum_fr_pnl'], label=f'95% 5%\nret: {funding_ret4:.2%}', color='tab:green')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('Funding PnL Curve')
plt.legend()

# resilience


In [None]:
# df1 = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015)在next里改)修正了check_signal版本)增加了取消订单)修正了取消订单的bug.json', lines=True)

# df1[df1['time']>='2025-11-02 02:03:23.894000'].head(20)

analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-08/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_resilience改变对冲时间0.0005.json')

In [None]:
# df1 = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改)修正了check_signal版本.json', lines=True)

# df1[df1['time']>='2025-11-02 02:03:23.894000'].head(20)
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改)修正了check_signal版本.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-26/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_True_False_False_0.0_0_95% 5%.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-05/2025-10-15-2025-11-25_ETH_0.0_2000.0_5_True_False_False_0.0_0_resilience_0.00015)在next里改)修正了check_signal版本)增加了取消订单)只用ex1的信号.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-05/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015)在next里改)修正了check_signal版本)增加了取消订单)只用ex0的信号.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-05/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_resilience_0.001)-0.00007_0.00008在next里改)修正了check_signal版本)增加了取消订单)只用ex1的信号)增加print.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-05/2025-11-01-2025-11-05_1764978282.133318.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改)修正了check_signal版本.json')

In [None]:
df1 = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改）.json',lines = True)
df2 = pd.read_json('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015)在next里改）.json',lines = True)

In [None]:
# 创建唯一标识符（时间+价格+交易类型）
df1['key'] = df1['time'].astype(str) + '_' + df1['price'].astype(str) + '_' + df1['type']
df2['key'] = df2['time'].astype(str) + '_' + df2['price'].astype(str) + '_' + df2['type']

# 找出df2中多出的记录
extra_in_df2 = df2[~df2['key'].isin(df1['key'])]
print(f"\n文件2中多出的 {len(extra_in_df2)} 条记录:")
print(extra_in_df2[['time', 'oid', 'price', 'volume', 'type', 'pos', 'hedge_oid']])

# 找出df1中有但df2中没有的记录（如果有）
extra_in_df1 = df1[~df1['key'].isin(df2['key'])]
if len(extra_in_df1) > 0:
    print(f"\n文件1中有但文件2中没有的 {len(extra_in_df1)} 条记录:")
    print(extra_in_df1[['time', 'oid', 'price', 'volume', 'type', 'pos', 'hedge_oid']])

In [None]:
df2[df2['oid']==41367]

In [None]:
df1[df1['time'] == '2025-11-02 02:03:24.194000']

In [None]:
# 选中df1 hedge_oid为空的行
df1_null_hedge = df1[df1['hedge_oid'].isna()]
df2_null_hedge = df2[df2['hedge_oid'].isna()]


In [None]:
df1_null_hedge

In [None]:
df1[~df1['hedge_oid'].isna()]

In [None]:
df2[~df2['hedge_oid'].isna()]

In [None]:
13298-11988

In [None]:
df2_null_hedge

In [None]:
df1.loc[4095:4120]

In [None]:
df2.loc[4095:4120]

In [None]:
df

In [None]:
extra_in_df2

In [None]:
# 分析对冲配对情况
print("\n对冲配对分析:")
print(f"文件1: 有对冲的订单 {df1['hedge_oid'].notna().sum()} 条")
print(f"文件2: 有对冲的订单 {df2['hedge_oid'].notna().sum()} 条")

# 检查是否有重复的oid
print(f"\n文件1 OID重复数: {df1['oid'].duplicated().sum()}")
print(f"文件2 OID重复数: {df2['oid'].duplicated().sum()}")

# 查看多出记录的对冲情况
if len(extra_in_df2) > 0:
    print("\n多出记录的对冲情况:")
    print(extra_in_df2[['time', 'oid', 'type', 'hedge_oid']].head(20))

In [None]:
df1

In [None]:
df1[df1['type']=='Maker_bid'].loc[4100:4150]

In [None]:
df2[df2['type']=='Maker_bid'].loc[4100:4150]

In [None]:
df2[df2['type']=='Maker_ask']

In [None]:
df1[df1['type']=='Maker_ask']

In [None]:

# 基本统计对比
print("=" * 50)
print("基本信息对比:")
print(f"文件1总条数: {len(df1)}")
print(f"文件2总条数: {len(df2)}")
print(f"差异: {len(df2) - len(df1)} 条")
print("=" * 50)

# 交易类型分布对比
print("\n交易类型分布:")
print("文件1:")
print(df1['type'].value_counts())
print("\n文件2:")
print(df2['type'].value_counts())
print("=" * 50)

# 时间范围对比
print("\n时间范围:")
print(f"文件1: {df1['time'].min()} 到 {df1['time'].max()}")
print(f"文件2: {df2['time'].min()} 到 {df2['time'].max()}")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
from utils_Sep import *
symbol = 'ETH' 

okx_csv     = f'/Volumes/T7/Obentech/fundingRateData/okx/{symbol}-USDT-SWAP.csv'
binance_csv = f'/Volumes/T7/Obentech/fundingRateData/binance/{symbol}USDT.csv'
last_time = pd.to_datetime('2025-11-07 04:15:00')
start_time = last_time - pd.Timedelta(days=30) 
df_okx     = process_funding_time_v3(okx_csv, 'okx')
df_binance = process_funding_time_v3(binance_csv, 'binance')

df_b = df_binance[(df_binance['Time'] >= start_time) & (df_binance['Time'] < last_time)].copy()
df_o = df_okx[(df_okx['Time'] >= start_time) & (df_okx['Time'] < last_time)].copy()


df_o = df_o.drop_duplicates(subset='FundingTime', keep='last')
sum_okx = df_o['FundingRate'].sum()
df_b = df_b.drop_duplicates(subset='FundingTime', keep='last')
sum_bnb = df_b['FundingRate'].sum()
earn    = sum_okx - sum_bnb
day_start = last_time - pd.Timedelta(days=1)

sum_okx1 = df_o[df_o['Time'] >= day_start]['FundingRate'].sum()
sum_bnb1 = df_b[df_b['Time'] >= day_start]['FundingRate'].sum()
earn_1day = sum_okx1 - sum_bnb1

funding_interval_bn = int((df_b.iloc[-1]['FundingTime'] - df_b.iloc[-2]['FundingTime']).total_seconds() / 3600)
funding_interval_okx = int((df_o.iloc[-1]['FundingTime'] - df_o.iloc[-2]['FundingTime']).total_seconds() / 3600)

df_o.rename(columns={'FundingRate': 'FundingRate_okx'}, inplace=True)
df_b.rename(columns={'FundingRate': 'FundingRate_binance'}, inplace=True)

if funding_interval_bn == funding_interval_okx:
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime'), how='left')
elif funding_interval_bn > funding_interval_okx:
    df_o_agg = df_o.set_index('FundingTime').resample(f'{funding_interval_bn}h', label='right', closed='right')['FundingRate_okx'].sum().to_frame()
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o_agg, how='left')
else:
    df_b_agg = df_b.set_index('FundingTime').resample(f'{funding_interval_okx}h', label='right', closed='right')['FundingRate_binance'].sum().to_frame()
    funding_diff = df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime').join(df_b_agg, how='left')


funding_diff['funding_diff']     = funding_diff['FundingRate_okx'] - funding_diff['FundingRate_binance']




json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改)修正了check_signal版本.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-05/2025-11-01-2025-11-05_1764978282.133318.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)


# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')
def total_pnl(trade_record,funding_diff):
    funding_diff = funding_diff.sort_values('FundingTime')
    funding_diff = funding_diff[(funding_diff.index <= trade_record['time'].iloc[-1]) & (funding_diff.index >= trade_record['time'].iloc[0])]
    trade_record = trade_record.sort_values('time')
    df = pd.merge_asof(
        funding_diff.reset_index(),
        trade_record[['time', 'pos','price']],
        left_on='FundingTime',
        right_on='time',
        direction='backward'
    )

    df['fr_pnl'] = df['funding_diff'] * df['pos'] * df['price']
    df['cum_fr_pnl'] = df['fr_pnl'].cumsum()

    final_result = pd.merge_asof(trade_record,df[['FundingTime','cum_fr_pnl']],left_on='time',right_on='FundingTime', direction='backward')

    final_result = final_result.dropna()
    final_result['total_pnl'] = final_result['net'] + final_result['cum_fr_pnl']
    return final_result, df

def process_trade_record(trade_record):
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    df_h = trade_record[trade_record['hedge_oid'].notna()].copy()
    df_base = trade_record[['oid', 'price', 'volume']]
    df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
    df_merge['pos'] = (df_merge['volume'] * df_merge['flag']).cumsum()
    df_merge['spread'] = df_merge['price'] - df_merge['price_base']
    df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
    df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
    df_merge.dropna(inplace=True)
    return df_merge
# 计算年化收益率和换手率

def calc_stats(df, principal,target_column='net'):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df[target_column].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 45_00_000
trade_record1 = process_trade_record(trade_record1)
trade_record2 = process_trade_record(trade_record2)




trade_record1, df1 = total_pnl(trade_record1,funding_diff)
trade_record2, df2 = total_pnl(trade_record2,funding_diff)

ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)


total_ann_return1, _, _ = calc_stats(trade_record1, principal,target_column='total_pnl')
total_ann_return2, _, _ = calc_stats(trade_record2, principal,target_column='total_pnl')


# 画图
plt.figure(figsize=(12, 6))
plt.plot(trade_record1[trade_record1['pos'] == 0]['time'], trade_record1[trade_record1['pos'] == 0]['total_pnl'], label=f'BM\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2[trade_record2['pos'] == 0]['time'], trade_record2[trade_record2['pos'] == 0]['total_pnl'], label=f'Improve\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# 画图, funding_pnl

funding_ret1 = df1['cum_fr_pnl'].iloc[-1]/principal/days1*365
funding_ret2 = df2['cum_fr_pnl'].iloc[-1]/principal/days2*365


plt.figure(figsize=(12, 6))
plt.plot(df1['FundingTime'], df1['cum_fr_pnl'], label=f'BenchMark\nret: {funding_ret1:.2%}', color='tab:blue')
plt.plot(df2['FundingTime'], df2['cum_fr_pnl'], label=f'Improve\nret: {funding_ret2:.2%}', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('Funding PnL Curve')
plt.legend()

In [None]:
# 把trade_record1和trade_record2的pos随时间变化画在一张图里，用plotly

import plotly.graph_objs as go
from plotly.subplots import make_subplots

# 如果时间不是datetime，先转换（可选）
# trade_record1['T'] = pd.to_datetime(trade_record1['T'])
# trade_record2['T'] = pd.to_datetime(trade_record2['T'])

fig = make_subplots(specs=[[{"secondary_y": False}]])

fig.add_trace(
    go.Scatter(
        x=trade_record1['time'], 
        y=trade_record1['pos'],
        mode='lines',
        name='pos_1'
    )
)

fig.add_trace(
    go.Scatter(
        x=trade_record2['time'], 
        y=trade_record2['pos'],
        mode='lines',
        name='pos_2'
    )
)

fig.update_layout(
    title="Position Curve Over Time",
    xaxis_title="Time",
    yaxis_title="Position",
    hovermode="x unified"
)

fig.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改)修正了check_signal版本.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015)在next里改)修正了check_signal版本.json'
# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)


# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')


trade_record1['flag'] = 1
trade_record1.loc[trade_record1['type'] == 'Maker_ask', 'flag'] = -1
df_h1 = trade_record1[trade_record1['hedge_oid'].notna()].copy()
# df_base1 = trade_record1[['oid', 'price', 'volume']]
# df_merge1 = df_h1.merge(df_base1,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
# df_merge1['pos'] = (df_merge1['volume'] * df_merge1['flag']).cumsum()
# df_merge1['spread'] = df_merge1['price'] - df_merge1['price_base']
# df_merge1['RSR'] = df_merge1['spread'] / df_merge1['price_base']
# df_merge1['net'] = (df_merge1['flag'] * df_merge1['volume'] * df_merge1['spread']).cumsum() + df_merge1['pos'] * df_merge1['spread'] + (df_merge1['volume'] * df_merge1['price'] * 0.0001).cumsum()
# df_merge1.dropna(inplace=True)


trade_record2['flag'] = 1
trade_record2.loc[trade_record2['type'] == 'Maker_ask', 'flag'] = -1
df_h2 = trade_record2[trade_record2['hedge_oid'].notna()].copy()
# df_base2 = trade_record2[['oid', 'price', 'volume']]
# df_merge2 = df_h2.merge(df_base1,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
# df_merge2['pos'] = (df_merge2['volume'] * df_merge2['flag']).cumsum()
# df_merge2['spread'] = df_merge2['price'] - df_merge2['price_base']
# df_merge2['RSR'] = df_merge2['spread'] / df_merge2['price_base']
# df_merge2['net'] = (df_merge2['flag'] * df_merge2['volume'] * df_merge2['spread']).cumsum() + df_merge2['pos'] * df_merge2['spread'] + (df_merge2['volume'] * df_merge2['price'] * 0.0001).cumsum()
# df_merge2.dropna(inplace=True)

In [None]:
trade_record2[~trade_record2['hedge_oid'].notna()].copy()

In [None]:
trade_record1[~trade_record1['hedge_oid'].notna()].copy()

In [None]:
df_h1

In [None]:
df_h2

In [None]:
# 把trade_record1和trade_record2的pos随时间变化画在一张图里，用plotly

import plotly.graph_objs as go
from plotly.subplots import make_subplots

# 如果时间不是datetime，先转换（可选）
# trade_record1['T'] = pd.to_datetime(trade_record1['T'])
# trade_record2['T'] = pd.to_datetime(trade_record2['T'])

fig = make_subplots(specs=[[{"secondary_y": False}]])

fig.add_trace(
    go.Scatter(
        x=trade_record1['time'], 
        y=trade_record1['pos'],
        mode='lines',
        name='pos_1'
    )
)

fig.add_trace(
    go.Scatter(
        x=trade_record2['time'], 
        y=trade_record2['pos'],
        mode='lines',
        name='pos_2'
    )
)

fig.update_layout(
    title="Position Curve Over Time",
    xaxis_title="Time",
    yaxis_title="Position",
    hovermode="x unified"
)

fig.show()


In [None]:
trade_record1[trade_record1['oid'] == 41369.00]

In [None]:
trade_record1[trade_record1['hedge_oid'] == 41369.00]

In [None]:
trade_record2['slippage'].describe()

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_朴素)在next里改）.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-04/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015)在next里改）.json')

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-03/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015).json')

In [None]:
from utils_Sep import *
symbol = 'ETH' 

okx_csv     = f'/Volumes/T7/Obentech/fundingRateData/okx/{symbol}-USDT-SWAP.csv'
binance_csv = f'/Volumes/T7/Obentech/fundingRateData/binance/{symbol}USDT.csv'
last_time = pd.to_datetime('2025-11-26 04:15:00')
start_time = last_time - pd.Timedelta(days=30) 
df_okx     = process_funding_time_v3(okx_csv, 'okx')
df_binance = process_funding_time_v3(binance_csv, 'binance')

df_b = df_binance[(df_binance['Time'] >= start_time) & (df_binance['Time'] < last_time)].copy()
df_o = df_okx[(df_okx['Time'] >= start_time) & (df_okx['Time'] < last_time)].copy()


df_o = df_o.drop_duplicates(subset='FundingTime', keep='last')
sum_okx = df_o['FundingRate'].sum()
df_b = df_b.drop_duplicates(subset='FundingTime', keep='last')
sum_bnb = df_b['FundingRate'].sum()
earn    = sum_okx - sum_bnb
day_start = last_time - pd.Timedelta(days=1)

sum_okx1 = df_o[df_o['Time'] >= day_start]['FundingRate'].sum()
sum_bnb1 = df_b[df_b['Time'] >= day_start]['FundingRate'].sum()
earn_1day = sum_okx1 - sum_bnb1

funding_interval_bn = int((df_b.iloc[-1]['FundingTime'] - df_b.iloc[-2]['FundingTime']).total_seconds() / 3600)
funding_interval_okx = int((df_o.iloc[-1]['FundingTime'] - df_o.iloc[-2]['FundingTime']).total_seconds() / 3600)

df_o.rename(columns={'FundingRate': 'FundingRate_okx'}, inplace=True)
df_b.rename(columns={'FundingRate': 'FundingRate_binance'}, inplace=True)

if funding_interval_bn == funding_interval_okx:
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime'), how='left')
elif funding_interval_bn > funding_interval_okx:
    df_o_agg = df_o.set_index('FundingTime').resample(f'{funding_interval_bn}h', label='right', closed='right')['FundingRate_okx'].sum().to_frame()
    funding_diff = df_b[['FundingTime', 'FundingRate_binance']].set_index('FundingTime').join(df_o_agg, how='left')
else:
    df_b_agg = df_b.set_index('FundingTime').resample(f'{funding_interval_okx}h', label='right', closed='right')['FundingRate_binance'].sum().to_frame()
    funding_diff = df_o[['FundingTime', 'FundingRate_okx']].set_index('FundingTime').join(df_b_agg, how='left')


funding_diff['funding_diff']     = funding_diff['FundingRate_okx'] - funding_diff['FundingRate_binance']

In [None]:
analyze_slippage_backtest('/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_朴素模式.json')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_朴素模式.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-03/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015.json'

# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)


# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')



def process_trade_record(trade_record):
    trade_record['flag'] = 1
    trade_record.loc[trade_record['type'] == 'Maker_ask', 'flag'] = -1
    df_h = trade_record[trade_record['hedge_oid'].notna()].copy()
    df_base = trade_record[['oid', 'price', 'volume']]
    df_merge = df_h.merge(df_base,left_on='hedge_oid',right_on='oid',how='left',suffixes=('', '_base'))
    df_merge['pos'] = (df_merge['volume'] * df_merge['flag']).cumsum()
    df_merge['spread'] = df_merge['price'] - df_merge['price_base']
    df_merge['RSR'] = df_merge['spread'] / df_merge['price_base']
    df_merge['net'] = (df_merge['flag'] * df_merge['volume'] * df_merge['spread']).cumsum() + df_merge['pos'] * df_merge['spread'] + (df_merge['volume'] * df_merge['price'] * 0.0001).cumsum()
    df_merge.dropna(inplace=True)
    return df_merge
# 计算年化收益率和换手率

def calc_stats(df, principal,target_column='net'):
    # 只考虑有成交的行
    df = df.copy()
    df = df[df['volume'] > 0]
    if df.empty:
        return 0, 0, 0
    start_time = df['time'].iloc[0]
    end_time = df['time'].iloc[-1]
    days = (end_time - start_time).total_seconds() / 86400
    if days == 0:
        days = 1/24  # 防止除零
    pnl = df[target_column].iloc[-1]
    ann_return = pnl / principal / days * 365
    turnover = (df['volume'] * df['price']).sum() / principal / days
    return ann_return, turnover, days

principal = 45_00_000
trade_record1 = process_trade_record(trade_record1)
trade_record2 = process_trade_record(trade_record2)




trade_record1, df1 = total_pnl(trade_record1,funding_diff)
trade_record2, df2 = total_pnl(trade_record2,funding_diff)


ann_return1, turnover1, days1 = calc_stats(trade_record1, principal)
ann_return2, turnover2, days2 = calc_stats(trade_record2, principal)


total_ann_return1, _, _ = calc_stats(trade_record1, principal,target_column='total_pnl')
total_ann_return2, _, _ = calc_stats(trade_record2, principal,target_column='total_pnl')


# 画图
plt.figure(figsize=(12, 6))
# plt.plot(trade_record1[trade_record1['pos'] == 0]['time'], trade_record1[trade_record1['pos'] == 0]['total_pnl'], label=f'BM\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
# plt.plot(trade_record2[trade_record2['pos'] == 0]['time'], trade_record2[trade_record2['pos'] == 0]['total_pnl'], label=f'Resilience\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')
plt.plot(trade_record1['time'], trade_record1['total_pnl'], label=f'BM\nret: {total_ann_return1:.2%} turnover: {turnover1:.2f}', color='tab:blue')
plt.plot(trade_record2['time'], trade_record2['total_pnl'], label=f'Resilience\nret: {total_ann_return2:.2%} turnover: {turnover2:.2f}', color='tab:orange')


plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('PnL Curve before 2025-08-13 10:00\n(Initial Capital 2000000)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


# 画图, funding_pnl

funding_ret1 = df1['cum_fr_pnl'].iloc[-1]/principal/days1*365
funding_ret2 = df2['cum_fr_pnl'].iloc[-1]/principal/days2*365

plt.figure(figsize=(12, 6))
plt.plot(df1['FundingTime'], df1['cum_fr_pnl'], label=f'BM\nret: {funding_ret1:.2%}', color='tab:blue')
plt.plot(df2['FundingTime'], df2['cum_fr_pnl'], label=f'Resilience\nret: {funding_ret2:.2%}', color='tab:orange')
plt.xlabel('Time')
plt.ylabel('PnL')
plt.title('Funding PnL Curve')
plt.legend()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# 两个json文件路径
json_path1 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-11-10/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0__-0.00007_0.00008_朴素模式.json'
json_path2 = '/Users/rayxu/Downloads/nuts_am/log/ETH/25-12-03/2025-11-01-2025-11-05_ETH_0.0_2000.0_5_False_False_False_0.0_0_-0.00007_0.00008_resilience_0.00015.json'

# 读取数据
trade_record1 = pd.read_json(json_path1, lines=True)
trade_record2 = pd.read_json(json_path2, lines=True)


# 统一时间格式
trade_record1['time'] = pd.to_datetime(trade_record1['time'], errors='coerce')
trade_record2['time'] = pd.to_datetime(trade_record2['time'], errors='coerce')

In [None]:
trade_record1

In [None]:
process_trade_record(trade_record1)['pos'].plot()

In [None]:
trade_record2['pos'].plot()

In [None]:
def total_pnl(trade_record,funding_diff):
    funding_diff = funding_diff.sort_values('FundingTime')
    funding_diff = funding_diff[(funding_diff.index <= trade_record['time'].iloc[-1]) & (funding_diff.index >= trade_record['time'].iloc[0])]
    trade_record = trade_record.sort_values('time')
    df = pd.merge_asof(
        funding_diff.reset_index(),
        trade_record[['time', 'pos','price']],
        left_on='FundingTime',
        right_on='time',
        direction='backward'
    )

    df['fr_pnl'] = df['funding_diff'] * df['pos'] * df['price']
    df['cum_fr_pnl'] = df['fr_pnl'].cumsum()

    final_result = pd.merge_asof(trade_record,df[['FundingTime','cum_fr_pnl']],left_on='time',right_on='FundingTime', direction='backward')

    final_result = final_result.dropna()
    final_result['total_pnl'] = final_result['net'] + final_result['cum_fr_pnl']
    return final_result, df

In [None]:
trade_record1 = process_trade_record(trade_record1)

trade_record1, df1 = total_pnl(trade_record1,funding_diff)

In [None]:
trade_record1

## 实盘滑点

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (13).csv','/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (14).csv','/Users/rayxu/Downloads/order.arbitrage_eth_okx_binance_09_2 (15).csv')

In [None]:
df.columns

In [None]:
df

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# 数据预处理
df['Createtime'] = pd.to_datetime(df['Createtime'])
df['date'] = df['Createtime'].dt.date
df['slippage'] = pd.to_numeric(df['slippage'], errors='coerce')
df['HedgingTimeUsed'] = pd.to_numeric(df['HedgingTimeUsed'], errors='coerce')

# 按日期分组
grouped = df.groupby('date')

# 1. 计算每天的滑点分位数
percentiles = [1, 5, 10, 25, 50, 55, 60, 65, 75, 95, 99]
percentile_data = {}

for p in percentiles:
    percentile_data[f'percentile_{p}'] = grouped['slippage'].quantile(p/100)

percentile_df = pd.DataFrame(percentile_data)

# 2. 计算每天的滑点均值
mean_df = grouped['slippage'].mean()

# 3. 计算每天HedgingTimeUsed<=5的比例
hedging_ratio = grouped.apply(
    lambda x: (x['HedgingTimeUsed'] <= 5).sum() / x['HedgingTimeUsed'].notna().sum() * 100
)


# 3. 计算每天HedgingTimeUsed<=5的比例
hedging_ratio_lte5 = grouped.apply(
    lambda x: (x['HedgingTimeUsed'] <= 5).sum() / x['HedgingTimeUsed'].notna().sum()
)

# 计算每天HedgingTimeUsed>5的比例
hedging_ratio_gt5 = grouped.apply(
    lambda x: (x['HedgingTimeUsed'] > 5).sum() / x['HedgingTimeUsed'].notna().sum()
)


# 4. 计算HedgingTimeUsed<=5和>5的滑点均值
slippage_lte5 = grouped.apply(
    lambda x: x[x['HedgingTimeUsed'] <= 5]['slippage'].mean()
)
slippage_gt5 = grouped.apply(
    lambda x: x[x['HedgingTimeUsed'] > 5]['slippage'].mean()
)

weighted_slippage_lte5 = hedging_ratio_lte5 * slippage_lte5
weighted_slippage_gt5 = hedging_ratio_gt5 * slippage_gt5


# 新增: 计算极端滑点(小于-0.001)的条数和比例
extreme_slippage_count = grouped.apply(
    lambda x: (x['slippage'] < -0.001).sum()
)

extreme_slippage_ratio = grouped.apply(
    lambda x: (x['slippage'] < -0.001).sum() / x['slippage'].notna().sum() * 100
)


# 绘图
# 图1: 分位数图
fig1 = go.Figure()
for col in percentile_df.columns:
    fig1.add_trace(go.Scatter(
        x=percentile_df.index,
        y=percentile_df[col],
        mode='lines+markers',
        name=col.replace('percentile_', 'P')
    ))
fig1.update_layout(
    title='每日滑点分位数分布',
    xaxis_title='日期',
    yaxis_title='滑点',
    height=600
)
fig1.show()

# 图2: 均值图
fig2 = go.Figure()
fig2.add_trace(go.Scatter(
    x=mean_df.index,
    y=mean_df.values,
    mode='lines+markers',
    name='滑点均值',
    line=dict(color='rgb(55, 128, 191)')
))
fig2.update_layout(
    title='每日滑点均值',
    xaxis_title='日期',
    yaxis_title='滑点均值',
    height=500
)
fig2.show()

# 图3: HedgingTimeUsed<=5的比例
fig3 = go.Figure()
fig3.add_trace(go.Scatter(
    x=hedging_ratio.index,
    y=hedging_ratio.values,
    mode='lines+markers',
    name='比例',
    line=dict(color='rgb(219, 64, 82)')
))
fig3.update_layout(
    title='每日HedgingTimeUsed≤5的比例',
    xaxis_title='日期',
    yaxis_title='比例 (%)',
    height=500
)
fig3.show()

# 图4: 分组滑点均值对比(线图)
fig4 = go.Figure()
fig4.add_trace(go.Scatter(
    x=slippage_lte5.index,
    y=slippage_lte5.values,
    mode='lines+markers',
    name='HedgingTimeUsed ≤ 5',
    line=dict(color='rgb(50, 171, 96)')
))
fig4.add_trace(go.Scatter(
    x=slippage_gt5.index,
    y=slippage_gt5.values,
    mode='lines+markers',
    name='HedgingTimeUsed > 5',
    line=dict(color='rgb(255, 127, 14)')
))
fig4.update_layout(
    title='每日分组滑点均值对比',
    xaxis_title='日期',
    yaxis_title='滑点均值',
    height=500
)
fig4.show()

# 新增图6: 比例加权滑点
fig6 = go.Figure()
fig6.add_trace(go.Scatter(
    x=weighted_slippage_lte5.index,
    y=weighted_slippage_lte5.values,
    mode='lines+markers',
    name='HedgingTimeUsed ≤ 5 (比例×滑点)',
    line=dict(color='rgb(50, 171, 96)', width=2)
))
fig6.add_trace(go.Scatter(
    x=weighted_slippage_gt5.index,
    y=weighted_slippage_gt5.values,
    mode='lines+markers',
    name='HedgingTimeUsed > 5 (比例×滑点)',
    line=dict(color='rgb(255, 127, 14)', width=2)
))
fig6.update_layout(
    title='每日比例加权滑点对比',
    xaxis_title='日期',
    yaxis_title='比例 × 滑点均值',
    height=500,
    hovermode='x unified'
)
fig6.show()


# 新增图7: 极端滑点分析 (双Y轴)
fig7 = make_subplots(specs=[[{"secondary_y": True}]])

fig7.add_trace(
    go.Bar(
        x=extreme_slippage_count.index,
        y=extreme_slippage_count.values,
        name='极端滑点条数',
        marker_color='rgb(239, 85, 59)',
        opacity=0.7
    ),
    secondary_y=False
)

fig7.add_trace(
    go.Scatter(
        x=extreme_slippage_ratio.index,
        y=extreme_slippage_ratio.values,
        mode='lines+markers',
        name='极端滑点比例',
        line=dict(color='rgb(99, 110, 250)', width=3),
        marker=dict(size=8)
    ),
    secondary_y=True
)

fig7.update_xaxes(title_text="日期")
fig7.update_yaxes(title_text="条数", secondary_y=False)
fig7.update_yaxes(title_text="比例 (%)", secondary_y=True)

fig7.update_layout(
    title='每日极端滑点分析 (滑点 < -0.001)',
    height=500,
    hovermode='x unified',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)
fig7.show()

# # 图5: 分组滑点均值对比(柱状图)
# fig5 = go.Figure()
# fig5.add_trace(go.Bar(
#     x=slippage_lte5.index,
#     y=slippage_lte5.values,
#     name='HedgingTimeUsed ≤ 5',
#     marker_color='rgb(50, 171, 96)'
# ))
# fig5.add_trace(go.Bar(
#     x=slippage_gt5.index,
#     y=slippage_gt5.values,
#     name='HedgingTimeUsed > 5',
#     marker_color='rgb(255, 127, 14)'
# ))
# fig5.update_layout(
#     title='每日分组滑点均值对比(柱状图)',
#     xaxis_title='日期',
#     yaxis_title='滑点均值',
#     barmode='group',
#     height=500
# )
# fig5.show()

# 可选: 保存图表为HTML文件
# fig1.write_html('percentiles.html')
# fig2.write_html('mean.html')
# fig3.write_html('hedging_ratio.html')
# fig4.write_html('comparison_line.html')
# fig5.write_html('comparison_bar.html')

# 20251217 抢盘口ETH对比

In [None]:
pd.read_csv('/Users/rayxu/Downloads/order.eth_gate_binance_02_2.csv',starttime='2025-07-10 09:00:00')

In [None]:
analyze_slippage('/Users/rayxu/Downloads/order.eth_gate_binance_02_2.csv',starttime='2025-12-17 21:00:00.000') # 实验组

In [None]:
_,_,_,df = analyze_slippage('/Users/rayxu/Downloads/order.arbitrage_eth_gate_binance_04_2.csv',starttime='2025-12-17 21:00:00.000')

# 20251223 动态对冲时间对比

In [None]:
compare_table, fig, df_a, df_b = analyze_slippage_compare(
    "/Users/rayxu/Downloads/order.btc_gate_binance_01_2.csv",
    "/Users/rayxu/Downloads/order.btc_gate_binance_03_2.csv",
    starttime="2025-12-19 09:59:14",
    endtime="2025-12-24 23:59:59",
    label_a="benchmark",
    label_b="dynamichedging",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)


In [None]:
compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2.csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2.csv", "/Users/rayxu/Downloads/order.btc_gate_binance_02_2.csv"]
compare_labels = ["BenchMark",'SwitchHedingTime','queueSniping']
compare_table, fig, df1, df2, df = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-19 09:59:14",
    endtime="2025-12-23 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:
# 同期 "2025-12-20 09:15:14"
compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2.csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2.csv", "/Users/rayxu/Downloads/order.btc_gate_binance_02_2.csv"]
compare_labels = ["BenchMark",'SwitchHedingTime','queueSniping']
compare_table, fig, df1, df2, df = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-20 09:15:14",
    endtime="2025-12-23 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:

compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (1).csv"]
compare_labels = ["BenchMark",'SwitchHedingTime']
compare_table, _, _, _= analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-19 09:59:14",
    endtime="2025-12-24 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:
pd.read_csv("/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (1).csv")

In [None]:
df

In [None]:
# 最新数据 + 同期 "2025-12-20 09:15:14"
compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_02_2.csv"]
compare_labels = ["BenchMark",'SwitchHedingTime','queueSniping']
compare_table, fig, df1, df2, df = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-20 09:15:14",
    endtime="2025-12-24 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def analyze_slippage_compare(
    file_paths,                 # list[str]
    starttime=None,
    endtime=None,
    labels=None,                # list[str]
    bins: int = 50,
    xlim_quantile=(0.01, 0.99),
    show: bool = True,
    # ---- 新增：同期匹配参数 ----
    time_col: str = "CreateTime",
    tolerance: str = "1min",
    direction: str = "nearest",   # "nearest"/"backward"/"forward"
):
    """
    输入多个路径(list)，对比多个文件的滑点（仅统计同期样本）：
    - 同期定义：按 Side 匹配，CreateTime 最近且时间差<=tolerance（默认1分钟）
    - 输出一个对比表格 compare_table
    - 输出一个 N*3 图（每行一个文件：Sell / Buy / Overall）

    返回:
        compare_table (pd.DataFrame)
        fig (matplotlib.figure.Figure)
        *dfs_matched (pd.DataFrame...): 每个文件在“同期样本集合”上的数据（_t, Side, slippage）
    """

    # ---------- 参数检查 ----------
    if not isinstance(file_paths, (list, tuple)) or len(file_paths) < 2:
        raise ValueError("file_paths 必须是 list/tuple 且长度>=2，例如 ['a.csv','b.csv'] 或 ['a','b','c']")
    n = len(file_paths)

    if labels is None:
        labels = [f"File{i+1}" for i in range(n)]
    if not isinstance(labels, (list, tuple)) or len(labels) != n:
        raise ValueError("labels 必须为 list/tuple 且长度与 file_paths 一致")

    def _to_ts(x):
        if x is None:
            return None
        return x if isinstance(x, pd.Timestamp) else pd.to_datetime(x)

    # ---------- 读+计算slippage，只保留匹配需要的字段 ----------
    def _read_and_calc(path: str) -> pd.DataFrame:
        df = pd.read_csv(path)

        required_cols = [
            "Order2FilledPrice", "Price", "ESR", "Side",
            "Order2Timestamp", "Timestamp",
            "AmountFilled", "AveragePrice", "OrderID",
            time_col,
        ]
        missing = [c for c in required_cols if c not in df.columns]
        if missing:
            raise ValueError(f"[{path}] 缺少必要列: {missing}")

        df = df[df["Order2FilledPrice"] != 0].copy()

        # 计算滑点（保持你原来的定义与符号处理）
        df["SR"] = df["Price"] / df["Order2FilledPrice"] - 1
        df["slippage"] = df["SR"] - df["ESR"]
        df["sign"] = df["Side"].apply(lambda x: 1 if x == "sell" else -1)
        df["slippage"] = df["slippage"] * df["sign"]

        # 去重（保持原逻辑）
        df = df.drop_duplicates(subset=["OrderID"]).copy()

        # 时间列：用 CreateTime 做同期匹配
        df["_t"] = pd.to_datetime(df[time_col], errors="coerce")
        df = df[df["_t"].notna()].copy()

        # 额外过滤区间（可选）
        st = _to_ts(starttime)
        et = _to_ts(endtime)
        if st is not None:
            df = df.loc[df["_t"] > st]
        if et is not None:
            df = df.loc[df["_t"] < et]

        df = df.loc[df["slippage"].notna(), ["_t", "Side", "slippage"]].copy()
        df = df.sort_values("_t")
        return df

    dfs_raw = [_read_and_calc(p) for p in file_paths]

    # ---------- 同期匹配：基准=第一个文件，依次 merge_asof（by=Side, tolerance=1min）----------
    tol = pd.Timedelta(tolerance)

    # 给每个文件加唯一id，防止复用
    dfs_raw = [df.copy() for df in dfs_raw]
    for k in range(n):
        dfs_raw[k][f"_id_{k}"] = np.arange(len(dfs_raw[k]))  # 或者用 OrderID 更稳：dfs_raw[k][f"_id_{k}"]=dfs_raw[k]["OrderID"]

    panel = dfs_raw[0].rename(columns={"slippage": labels[0]}).copy()
    panel["_base_id"] = np.arange(len(panel))  # 基准行id

    for i in range(1, n):
        right = dfs_raw[i].rename(columns={"slippage": labels[i]}).copy()
        # 右表时间列也带过来，便于算 dt
        right = right.rename(columns={"_t": f"_t_{i}"})

        panel = pd.merge_asof(
            panel.sort_values("_t"),
            right.sort_values(f"_t_{i}"),
            left_on="_t",
            right_on=f"_t_{i}",
            by="Side",
            tolerance=tol,
            direction=direction,
        )

        # 只对这次 merge 成功的行算 dt（没匹配到的先留着，最后统一 dropna）
        panel[f"dt_{i}"] = (panel["_t"] - panel[f"_t_{i}"]).abs()

        # 关键：禁止右表记录被复用 —— 对右表 id 去重，保留 dt 最小的那条匹配
        # （先按 dt 从小到大排序，再 drop_duplicates）
        panel = panel.sort_values(f"dt_{i}").drop_duplicates(subset=[f"_id_{i}"], keep="first")

        # 同时保证基准行也不重复（一般不会，但保险）
        panel = panel.drop_duplicates(subset=["_base_id"], keep="first")

        # 恢复按时间排序，方便下一轮 merge_asof
        panel = panel.sort_values("_t")

    # 最终只保留所有文件都匹配成功的同期样本
    panel = panel.dropna(subset=list(labels)).reset_index(drop=True)





    # 把 panel 还原成每个文件一个 df（方便复用你原来的getter逻辑）
    dfs = []
    for lab in labels:
        dfi = panel[["_t", "Side", lab]].rename(columns={lab: "slippage"}).copy()
        dfs.append(dfi)

    # ---------- 统计函数（不变） ----------
    def _summarize(series: pd.Series, percentiles=(1, 5, 10, 25, 50, 75, 90, 95, 99)) -> dict:
        arr = series.dropna().to_numpy()
        if len(arr) == 0:
            out = {"count": 0, "mean": np.nan, "std": np.nan, "median": np.nan, "min": np.nan, "max": np.nan}
            for p in percentiles:
                out[f"p{p}"] = np.nan
            return out

        out = {
            "count": int(len(arr)),
            "mean": float(np.mean(arr)),
            "std": float(np.std(arr)),
            "median": float(np.median(arr)),
            "min": float(np.min(arr)),
            "max": float(np.max(arr)),
        }
        for p in percentiles:
            out[f"p{p}"] = float(np.percentile(arr, p))
        return out

    # ---------- 对比表格（结构基本不变：side/metric + 各label + diff相对第一个） ----------
    sides = {
        "sell": lambda d: d[d["Side"] == "sell"]["slippage"],
        "buy":  lambda d: d[d["Side"] == "buy"]["slippage"],
        "overall": lambda d: d["slippage"],
    }

    rows = []
    for side_name, getter in sides.items():
        stats_list = [_summarize(getter(df)) for df in dfs]
        keys = stats_list[0].keys()
        for k in keys:
            rows.append((side_name, k, *[st[k] for st in stats_list]))

    summary_df = pd.DataFrame(rows, columns=["side", "metric", *labels]).set_index(["side", "metric"])


    compare_table = summary_df

    # ---------- 作图：N*3（每行一个文件：Sell/Buy/Overall），统一 xlim ----------
    def _q_range_many(series_list, ql, qh):
        x = pd.concat([s.dropna() for s in series_list], ignore_index=True)
        if len(x) == 0:
            return None, None
        lo = x.quantile(ql) if ql is not None else x.min()
        hi = x.quantile(qh) if qh is not None else x.max()
        if np.isfinite(lo) and np.isfinite(hi) and lo < hi:
            return float(lo), float(hi)
        return None, None

    ql, qh = xlim_quantile if xlim_quantile is not None else (None, None)
    x_lo, x_hi = _q_range_many([df["slippage"] for df in dfs], ql, qh)

    fig, axes = plt.subplots(n, 3, figsize=(22, 5 * n), sharex=True)
    if n == 1:
        axes = np.array([axes])

    col_defs = [("sell", "Sell"), ("buy", "Buy"), ("overall", "Overall")]

    for r, (dff, lab) in enumerate(zip(dfs, labels)):
        for c, (side_key, side_title) in enumerate(col_defs):
            ax = axes[r, c]
            s = sides[side_key](dff).dropna()

            ax.hist(s, bins=bins, alpha=0.7)
            mu = s.mean() if len(s) else np.nan
            sd = s.std() if len(s) else np.nan
            if np.isfinite(mu):
                ax.axvline(mu, linestyle="--", label=f"mean={mu:.6g}")

            ax.set_title(f"{lab} - {side_title} (n={len(s)}, std={sd:.6g})")
            ax.set_xlabel("slippage")
            ax.set_ylabel("freq")

            # 避免 “No artists with labels...” warning
            handles, leg_labels = ax.get_legend_handles_labels()
            if len(leg_labels) > 0:
                ax.legend()

            if x_lo is not None and x_hi is not None:
                ax.set_xlim(x_lo, x_hi)

    plt.tight_layout()
    if show:
        plt.show()

    return (compare_table, fig, *dfs)


In [None]:
pd.read_csv("/Users/rayxu/Downloads/order.btc_gate_binance_01_2 (1).csv")

In [None]:
compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_02_2.csv"]
compare_labels = ["BenchMark",'SwitchHedingTime','queueSniping']

compare_table, fig, df_old, df_new, df = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    time_col="Createtime",
    tolerance="1min",
    direction="nearest",
    starttime="2025-12-20 09:15:14",
    endtime="2025-12-24 05:00:00",
)
display(compare_table)


In [None]:
compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_01_2 (1).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (1).csv"]
compare_labels = ["BenchMark",'SwitchHedingTime']

compare_table, fig, df_old, df_new = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    time_col="Createtime",
    tolerance="2min",
    direction="nearest",
    starttime="2025-12-20 09:15:14",
    endtime="2025-12-24 05:00:00",
)
display(compare_table)


In [None]:
analyze_slippage("/Users/rayxu/Downloads/order.btc_gate_binance_02_2.csv")

In [None]:
panel

In [None]:
analyze_slippage("/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv",starttime="2025-12-26 07:45:14")

In [None]:
pd.read_csv("/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv")

In [None]:
df = df[df.Order2FilledPrice!=0]
df = df.drop_duplicates(subset=['OrderID'])

In [None]:
df = pd.read_csv("/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv")
df = df[df.Order2FilledPrice!=0]
# 计算滑点
df['SR'] = df['Price']/df['Order2FilledPrice']-1
df['slippage'] = df['SR']-df['ESR']
df['sign'] = df['Side'].apply(lambda x: 1 if x == 'sell' else -1)
df['slippage'] = df['slippage']*df['sign']
df['TimeUsed'] = (pd.to_datetime(df['Order2Timestamp']) - pd.to_datetime(df['Timestamp'])).dt.total_seconds()
df['HedgingTimeUsed'] = (pd.to_datetime(df['Order2Timestamp']) - pd.to_datetime(df['Timestamp'])).dt.total_seconds()    
df['Amount'] = df['AmountFilled']*df['AveragePrice']
df

In [None]:
df.drop_duplicates(subset=['OrderID'])

In [None]:
df['slippage'].mean()

In [None]:
df

In [None]:

compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_02_2 (2).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (3).csv"]
compare_labels = ["QueueSniping",'BenchMark']
compare_table, fig, df1, df2= analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-24 07:13:14",
    endtime="2025-12-27 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:

compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_02_2 (2).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (3).csv" ,"/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv"]
compare_labels = ["QueueSniping",'BenchMark','trendInterval']
compare_table, fig, df1, df2, df3= analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    starttime="2025-12-26 06:52:14",
    endtime="2025-12-27 05:00:00",
    bins=60,
    xlim_quantile=(0.01, 0.99),
)
display(compare_table)

In [None]:

# compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_02_2 (2).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (3).csv" ,"/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv"]
# compare_labels = ["QueueSniping",'BenchMark','trendInterval']
# compare_table, fig, df1, df2, df3= analyze_slippage_compare(
#     file_paths=compare_paths,
#     labels=compare_labels,
#     starttime="2025-12-26 04:59:14",
#     endtime="2025-12-27 05:00:00",
#     bins=60,
#     xlim_quantile=(0.01, 0.99),
# )
# display(compare_table)





compare_paths=["/Users/rayxu/Downloads/order.btc_gate_binance_02_2 (2).csv", "/Users/rayxu/Downloads/order.btc_gate_binance_03_2 (3).csv" ,"/Users/rayxu/Downloads/order.btc_gate_binance_04_2.csv"]
compare_labels = ["QueueSniping",'BenchMark','trendInterval']

compare_table, fig, df_old, df_new, df = analyze_slippage_compare(
    file_paths=compare_paths,
    labels=compare_labels,
    time_col="Createtime",
    tolerance="1min",
    direction="nearest",
    starttime="2025-12-26 04:59:14",
    endtime="2025-12-27 05:00:00",
)
display(compare_table)

In [None]:
(pd.to_datetime(df2['Order2Timestamp'])-pd.to_datetime(df2['Order2CreateTime'])).dt.total_seconds().describe()

In [None]:
(pd.to_datetime(df1['Order2Timestamp'])-pd.to_datetime(df1['Order2CreateTime'])).dt.total_seconds().describe()

In [None]:
(pd.to_datetime(df1['Order2Timestamp'])-pd.to_datetime(df1['Order2CreateTime'])).dt.total_seconds()

In [None]:
df2['HedgingTimeUsed_sec'].describe()

In [None]:
df2['HedgingTimeUsed_sec'] = (pd.to_datetime(df2['Order2Timestamp'])-pd.to_datetime(df2['Order2CreateTime'])).dt.total_seconds()
df2

In [None]:
df2[df2.HedgingTimeUsed_sec >= 5]['slippage'].describe()

In [None]:
(pd.to_datetime(df2['Order2Timestamp'])-pd.to_datetime(df2['Order2CreateTime']))

# 20251230 抢盘口对比

In [None]:
download_dcdl_orders(env="am_csv_s518",ver="20251227-am-UpdateAccountName-fixSaveEnabled",suffix=["btc_okx_binance_01_2", "btc_okx_binance_08_2",'btc_okx_binance_09_2'])
download_dcdl_orders(env="ps_csv_k480",ver="20251009-arbitrage-checkOverallLameLog",suffix=["arbitrage_btc_okx_binance_02_2"])


In [None]:
# 无修饰抢盘口 vs PS基准

file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_s518/20251227-am-UpdateAccountName-fixSaveEnabled/order.btc_okx_binance_01_2.csv','/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251009-arbitrage-checkOverallLameLog/order.arbitrage_btc_okx_binance_02_2.csv']
labels=["Snip1","BM"] 

# 分段

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-29 17:27:00",
    endtime="2025-12-29 17:40:00",
)

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-29 20:14:00",
    endtime="2025-12-29 21:14:00",
)

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-30 12:10:00",
    endtime="2025-12-30 17:23:00",
)

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-30 22:30:00",
    endtime="2025-12-31 00:00:00",
)


In [None]:
# 整体

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-29 17:27:00",
    endtime="2025-12-31 17:40:00",
)

## BN-OK 抢盘口优化版 vs 抢盘口朴素版 vs 抢盘口+动态对冲 vs 基准 

### BTC

In [None]:
download_dcdl_orders(env="am_csv_s518",ver="20251227-am-UpdateAccountName-fixSaveEnabled",suffix=["btc_okx_binance_01_2", "btc_okx_binance_08_2",'btc_okx_binance_09_2'])
download_dcdl_orders(env="ps_csv_k480",ver="20251009-arbitrage-checkOverallLameLog",suffix=["arbitrage_btc_okx_binance_02_2"])


In [None]:
# 无修饰抢盘口 vs PS基准

file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_s518/20251227-am-UpdateAccountName-fixSaveEnabled/order.btc_okx_binance_01_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251227-am-UpdateAccountName-fixSaveEnabled/order.btc_okx_binance_08_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251227-am-UpdateAccountName-fixSaveEnabled/order.btc_okx_binance_09_2.csv','/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251009-arbitrage-checkOverallLameLog/order.arbitrage_btc_okx_binance_02_2.csv']
labels=["Snip1","Snip2","Snip2+DynamicHedgingTime","BM"] 

# 分段

compare_table, fig, df1, df2, df3, df4 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-30 22:30:00",
    endtime="2025-12-31 17:40:00",
)




In [None]:
# 第二轮
download_dcdl_orders(env="am_csv_s518",ver="20251231-am-fixCancel",suffix=["btc_okx_binance_08_2",'btc_okx_binance_09_2'])




In [None]:
file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.btc_okx_binance_08_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.btc_okx_binance_09_2.csv']
labels=["Snip2","Snip2+DynamicHedgingTime"] 



compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2026-01-01 03:46:00",
    endtime="2026-01-06 17:40:00",
)

### ETH

In [None]:
download_dcdl_orders(env="am_csv_s518",ver="20251231-am-fixCancel",suffix=["eth_okx_binance_08_2"])

In [None]:
download_dcdl_orders(env="ps_csv_k480",ver="20251117-arbitrage-logOnOrderLag",suffix=["arbitrage_eth_okx_binance_09_2"])

In [None]:
download_dcdl_orders(env="ps_csv_k480",ver="20260121-arbitrage-fixNegativeIsOrderBigEnough",suffix=["arbitrage_eth_okx_binance_02_2"])
file_paths = ['/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251117-arbitrage-logOnOrderLag/order.arbitrage_eth_okx_binance_09_2.csv','/Volumes/T7/Obentech/AMappData/ps_csv_k480/20260121-arbitrage-fixNegativeIsOrderBigEnough/order.arbitrage_eth_okx_binance_02_2.csv']
labels=["09","02"] 


compare_table, fig, df1, df2= analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime='2026-02-04 20:00:00',
    endtime='2026-02-05 05:00:00',
)

In [None]:
compare_table

In [None]:
download_dcdl_orders(env="ps_csv_k480",ver="20251102-arbitrage-handleOrdrNotExist-fixCheckOverallLameDueToPrecision",suffix=["arbitrage_eth_okx_binance_01_2"])

In [None]:
df = pd.read_csv('/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.eth_okx_binance_08_2.csv')
df = df[df["Order2FilledPrice"] != 0].copy()
df["SR"] = df["Price"] / df["Order2FilledPrice"] - 1
df["slippage"] = df["SR"] - df["ESR"]
df["sign"] = df["Side"].apply(lambda x: 1 if x == "sell" else -1)
df["slippage"] = df["slippage"] * df["sign"]

# 1) 把 inf/-inf 变成 NaN
df["slippage"] = df["slippage"].replace([np.inf, -np.inf], np.nan)

# 2) 只根据这一列 drop NaN 的行
df = df.dropna(subset = ['slippage'])
df[(df.Createtime > "2026-02-02 06:21:00")&(df.Createtime < "2026-02-03 06:21:00")]['slippage'].mean()

In [None]:
start_date = '2026-02-04 20:00:00'
end_date = '2026-02-05 05:00:00'

In [None]:
file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.eth_okx_binance_08_2.csv','/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251117-arbitrage-logOnOrderLag/order.arbitrage_eth_okx_binance_09_2.csv']
labels=["Snip1","BM"] 


compare_table, fig, df1, df2= analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime='2026-02-04 05:00:00',
    endtime='2026-02-05 05:00:00',
)

In [None]:
compare_table

In [None]:
df1[df1.sign==1]

In [None]:
print(df1[df1.sign==1]['ESR'].mean(),df1[df1.sign==1]['SR'].mean())
print(df1[df1.sign==-1]['ESR'].mean(),df1[df1.sign==-1]['SR'].mean())
print(df2[df2.sign==1]['ESR'].mean(),df2[df2.sign==1]['SR'].mean())
print(df2[df2.sign==-1]['ESR'].mean(),df2[df2.sign==-1]['SR'].mean())


In [None]:
# 无修饰抢盘口+调整了下单量 vs PS基准

file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.eth_okx_binance_08_2.csv','/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251117-arbitrage-logOnOrderLag/order.arbitrage_eth_okx_binance_09_2.csv']
labels=["Snip1","BM"] 

# 分段

compare_table, fig, df1, df2= analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2026-01-05 09:44:00",
    endtime="2026-01-06 13:30:00",
)




# BN-Gate


In [None]:
download_dcdl_orders(env="am_csv_k480",ver="20251226-am-trendInterval",suffix=["btc_gate_binance_02_2","btc_gate_binance_03_2","btc_gate_binance_04_2"])

In [None]:
download_dcdl_orders(env="am_csv_k480",ver="20251217-am-randomSize",suffix=["btc_gate_binance_05_2"])

In [None]:
file_paths = ['/Volumes/T7/Obentech/AMappData/am_csv_k480/20251226-am-trendInterval/order.btc_gate_binance_02_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_k480/20251226-am-trendInterval/order.btc_gate_binance_03_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_k480/20251226-am-trendInterval/order.btc_gate_binance_04_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_k480/20251217-am-randomSize/order.btc_gate_binance_05_2.csv']
labels=["QueueSnipingV1", "BM+BugFix", "QueueSnipingV2","BM+Bug"] 

compare_table, fig, df1, df2, df3, df4 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2025-12-29 17:27:00",
    endtime="2025-12-31 08:00:00",
)
display(compare_table)

# 抢盘口应用小币

In [None]:
download_dcdl_orders(env="ps_csv_k480",ver="20251017-arbitrage-RiskMode-fixArbitrageLame-fixOverallLame",suffix=["arbitrage_aster_okx_binance_02_2"])

In [None]:
download_dcdl_orders(env="am_csv_s518",ver="20251231-am-fixCancel",suffix=["aster_okx_binance_09_2"])

In [None]:
start_date = '2026-01-13 15:00:00'
# start_date = '2025-12-11 15:30:00'
end_date = '2026-01-15 15:00:00'

In [None]:
df

In [None]:
file_paths = ['/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251017-arbitrage-RiskMode-fixArbitrageLame-fixOverallLame/order.arbitrage_aster_okx_binance_02_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.aster_okx_binance_09_2.csv']
labels=["BM","QueueSnipingV2"] 

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime='2026-01-07 15:00:00',
    endtime='2026-01-15 15:00:00',
)
display(compare_table)

In [None]:
file_paths = ['/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251017-arbitrage-RiskMode-fixArbitrageLame-fixOverallLame/order.arbitrage_aster_okx_binance_02_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.aster_okx_binance_09_2.csv']
labels=["BM","QueueSnipingV2"] 

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime="2026-01-01 06:57:00",
    endtime="2026-01-07 08:00:00",
)
display(compare_table)

In [None]:


file_paths = ['/Volumes/T7/Obentech/AMappData/ps_csv_k480/20251017-arbitrage-RiskMode-fixArbitrageLame-fixOverallLame/order.arbitrage_aster_okx_binance_02_2.csv','/Volumes/T7/Obentech/AMappData/am_csv_s518/20251231-am-fixCancel/order.aster_okx_binance_09_2.csv']
labels=["BM","QueueSnipingV2"] 

compare_table, fig, df1, df2 = analyze_slippage_compare(
    file_paths=file_paths,
    labels=labels,
    starttime='2026-01-07 06:20:00',
    endtime="2026-01-08 12:00:00",
)
display(compare_table)

In [None]:
df1.groupby('sign')['SR'].mean()

In [None]:
df1[df1['sign']==-1]['ESR'].mean()

In [None]:
df1[df1['sign']==-1]['SR'].mean()

In [None]:
df1[df1['sign']==1]['ESR'].mean()

In [None]:
df1[df1['sign']==1]['SR'].mean()

## 0114 进一步敞口

0210 对比正负万1

In [None]:
download_dcdl_orders(env="am_csv_k480",ver="20260124-instantOrder-maxDelay",suffix=["eth_binance_okx_03_2"])

In [None]:
_,_,_,df = analyze_slippage('/Volumes/T7/Obentech/AMappData/am_csv_k480/20260124-instantOrder-maxDelay/order.eth_binance_okx_03_2.csv')

In [None]:
df[(df['ESR']<0.00005)&(df['ESR']>-0.00005)]['slippage']

In [None]:
df[(df['ESR']<0.00005)&(df['ESR']>-0.00005)]