In [9]:
import concurrent.futures
import logging
import traceback
import gc
import connectorx as cx
import pandas as pd
import numpy as np
import os
import duckdb
from datetime import datetime, timedelta
import glob
import multiprocessing as mp
from functools import partial
import time

In [None]:
# 设置日志

def calculate_minute_level_apb(trading_date,df_adj):
    """计算分钟级APB因子"""

    trading_date = pd.to_datetime(trading_date)

    date_str = trading_date.strftime('%Y%m%d')
    
    order_pth = f"/data/HighFreqData/Order/l2order/{date_str}.parquet"
    
    # 确保输出目录存在
    output_dir = "./factors/1200_minute_level_APB"
    os.makedirs(output_dir, exist_ok=True)

    output_file = f"{output_dir}/{date_str}.parquet"
    
    if not os.path.exists(order_pth):
        return
        # raise FileNotFoundError(f"订单数据文件不存在: {order_pth}")
    
    # 如果该日期的文件已存在，跳过处理
    if os.path.exists(output_file):
        # print(f"日期 {date_str} 已处理，跳过")
        return

    conn = duckdb.connect(database=':memory:')

    # 1. 读取毫秒级订单簿数据并转换为分钟级
    query_order = f"""
        WITH buy_orders_with_time AS(
            SELECT 
                security_code,
                order_side, 
                order_type,
                order_details,
                order_price,
                order_price_adj,
                order_volume,
                order_time,
                ('{date_str}'::DATE + MAKE_TIME(
                    FLOOR(order_time / 10000000)::int,
                    (FLOOR(order_time / 100000) % 100)::int,
                    (FLOOR(order_time / 1000) % 100)::numeric + 
                    (order_time % 1000)::numeric / 1000)
                )::TIMESTAMP AS formatted_time
            FROM '{order_pth}'
            WHERE order_side = 1
                AND order_type = 'A'
                AND order_details = 'L'
                AND order_price > 0
                AND order_volume > 0
                AND order_time >= 93000000
                AND order_time < 145700000
        )
        SELECT
            security_code,
            order_price,
            order_volume,
            '{date_str}' AS date,
            DATE_TRUNC('minute', formatted_time) AS order_minute
        FROM buy_orders_with_time
        """

    # 将数据加载到临时表中
    conn.execute(f"CREATE TEMPORARY TABLE minute_order AS {query_order}")
   
    # 2. 计算每分钟的VWAP和TWAP
    minute_apb_query = """
        WITH minute_level_stats AS (
            -- 直接计算每分钟的VWAP和TWAP
            SELECT 
                date,
                security_code,
                order_minute,
                SUM(order_volume * order_price) / SUM(order_volume) AS vwap,
                AVG(order_price) AS twap,
                SUM(order_volume) AS total_volume
            FROM minute_order
            GROUP BY security_code, order_minute
        )
        -- 计算APB因子
        SELECT 
            '{date_str}' AS date_str,
            security_code,
            order_minute,
            vwap,
            twap,
            CASE 
                WHEN twap > 0 AND vwap > 0 THEN LN(twap / vwap)
                ELSE NULL
            END AS apb,
            (twap - vwap) / vwap AS apb_alt,
            total_volume
        FROM minute_level_stats
        WHERE total_volume > 0
        ORDER BY security_code
    """

    # 3. 保存分钟级数据
    minute_apb_df = conn.execute(minute_apb_query).fetchdf()
    
    # 保存分钟级数据
    minute_apb_df.to_parquet(output_file)
    
    return minute_apb_df

def process_all_trading_days(trading_dates):
    """处理所有交易日的数据"""
    for date in trading_dates:
        try:
            calculate_minute_level_apb(date)
            # print(f"成功处理日期: {date}")
        except Exception as e:
            print(f"处理日期 {date} 时出错: {str(e)}")



In [None]:
def get_adjust_price(df_close):
    df_adj_spec = df_adj.loc[df_adj.security_code == df_close['security_code'].iloc[0]].sort_values(by='ExDiviDate', ascending=False)
    if df_adj_spec.shape[0] == 0:
        return df_close
    df_close['IsReadjusted'] = False
    df_close['adj_twap'] = df_close['twap']
    df_close['adj_vwap'] = df_close['vwap']
    cols2adj = ['adj_twap', 'adj_vwap']
    for d, f in df_adj_spec[['ExDiviDate', 'AdjustingFactor']].values:
        mask = (df_close.order_minute >= d) & ~df_close.IsReadjusted
        df_close.loc[mask, cols2adj] = df_close.loc[mask, cols2adj] * f
        df_close.loc[mask, 'IsReadjusted'] = True
    del df_close['IsReadjusted']
    return df_close
        

In [1]:
import os
import glob
import pandas as pd
import multiprocessing
from tqdm import tqdm
import time
from datetime import datetime

def get_adjust_price(df_close, adj_factors):
    """
    使用复权因子调整价格
    
    参数:
    df_close: 包含价格数据的DataFrame
    adj_factors: 当日对应的复权因子DataFrame
    
    返回:
    调整后的价格DataFrame
    """
    security_code = df_close['security_code'].iloc[0]
    # 从复权因子表中查找对应的股票
    df_adj_spec = adj_factors[adj_factors.security_code == security_code]
    
    if df_adj_spec.shape[0] == 0:
        return df_close
    
    df_close['IsReadjusted'] = False
    df_close['adj_twap'] = df_close['twap']
    df_close['adj_vwap'] = df_close['vwap']
    cols2adj = ['adj_twap', 'adj_vwap']
    
    # 按照复权日期排序
    df_adj_spec = df_adj_spec.sort_values(by='ExDiviDate', ascending=False)
    
    # 应用复权因子
    for d, f in df_adj_spec[['ExDiviDate', 'AdjustingFactor']].values:
        mask = (df_close.order_minute >= d) & ~df_close.IsReadjusted
        df_close.loc[mask, cols2adj] = df_close.loc[mask, cols2adj] * f
        df_close.loc[mask, 'IsReadjusted'] = True
    
    del df_close['IsReadjusted']
    return df_close

def process_file(file_path):
    """处理单个文件的函数"""
    try:
        # 检查是否已处理过
        file_name = os.path.basename(file_path)
        marker_file = os.path.join(os.path.dirname(file_path), ".processed", file_name + ".done")
        
        # 如果标记文件存在，说明已处理过
        if os.path.exists(marker_file):
            return {"file": file_path, "status": "skipped", "message": "Already processed"}
        
        # 读取数据
        daily_data = pd.read_parquet(file_path)
        
        # 检查数据内容是否已经处理过
        if 'adj_twap' in daily_data.columns:
            return {"file": file_path, "status": "skipped", "message": "Data already has adjusted columns"}
        
        # 获取日期信息
        if isinstance(daily_data['order_minute'].iloc[0], str):
            # 如果order_minute已经是字符串格式，尝试从中提取日期
            date_str = daily_data['date_str'].iloc[0] if 'date_str' in daily_data.columns else daily_data['order_minute'].iloc[0].split(' ')[0].replace('-', '')
        else:
            # 如果order_minute是datetime格式，直接获取日期
            date_str = daily_data['order_minute'].iloc[0].strftime('%Y%m%d')
        
        # 加载对应日期的复权因子
        adj_factor_path = f"./Daily_Adjusting_Factor/{date_str}.parquet"
        
        if not os.path.exists(adj_factor_path):
            return {"file": file_path, "status": "error", "message": f"Adjustment factor file not found for date {date_str}"}
        
        adj_factors = pd.read_parquet(adj_factor_path)
        
        # 处理数据
        daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
            lambda x: get_adjust_price(x, adj_factors)
        ).reset_index(drop=True)
        
        # 转换日期格式
        if not isinstance(daily_adjust['order_minute'].iloc[0], str):
            daily_adjust['date_str'] = daily_adjust['order_minute'].dt.strftime('%Y%m%d')
            daily_adjust['order_minute'] = daily_adjust['order_minute'].dt.strftime('%H:%M')
        
        # 保存处理后的数据
        daily_adjust.to_parquet(file_path, index=False)
        
        # 创建处理完成标记
        os.makedirs(os.path.dirname(marker_file), exist_ok=True)
        with open(marker_file, 'w') as f:
            f.write(f"Processed at {time.strftime('%Y-%m-%d %H:%M:%S')}")
        
        return {"file": file_path, "status": "success", "message": "Processed successfully"}
    
    except Exception as e:
        return {"file": file_path, "status": "error", "message": str(e)}


def Adjust_price(num_processes=None):
    """使用多进程调整价格数据"""
    # 设置目录
    data_dir = "./factors/minute_level_APB"
    
    # 创建处理标记目录
    processed_dir = os.path.join(data_dir, ".processed")
    os.makedirs(processed_dir, exist_ok=True)
    
    # 获取所有parquet文件
    parquet_files = sorted(glob.glob(os.path.join(data_dir, "*.parquet")))
    
    if not parquet_files:
        print(f"没有找到parquet文件在目录: {data_dir}")
        return
    
    print(f"找到 {len(parquet_files)} 个文件需要处理")
    
    # 检查复权因子目录是否存在
    adj_factor_dir = "./Daily_Adjusting_Factor"
    if not os.path.exists(adj_factor_dir):
        print(f"错误: 复权因子目录不存在: {adj_factor_dir}")
        return
    
    # 设置进程数
    if num_processes is None:
        num_processes = min(20, multiprocessing.cpu_count())
    
    # 创建进程池
    pool = multiprocessing.Pool(processes=num_processes)
    
    try:
        # 使用tqdm显示进度
        results = []
        for result in tqdm(pool.imap_unordered(process_file, parquet_files), 
                          total=len(parquet_files), 
                          desc="处理文件"):
            results.append(result)
        
        # 关闭进程池
        pool.close()
        pool.join()
        
        # 统计处理结果
        success_count = sum(1 for r in results if r["status"] == "success")
        skipped_count = sum(1 for r in results if r["status"] == "skipped")
        error_count = sum(1 for r in results if r["status"] == "error")
        
        print(f"处理完成: 成功 {success_count}, 跳过 {skipped_count}, 错误 {error_count}")
        
        # 如果有错误，打印错误详情
        if error_count > 0:
            print("\n错误详情:")
            for r in results:
                if r["status"] == "error":
                    print(f"  {r['file']}: {r['message']}")
        
    except KeyboardInterrupt:
        print("\n处理被中断，正在清理资源...")
        pool.terminate()
        pool.join()
    except Exception as e:
        print(f"发生错误: {str(e)}")
        pool.terminate()
        pool.join()

# 使用示例
if __name__ == "__main__":
    Adjust_price(num_processes=20)


找到 854 个文件需要处理


  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adjust = daily_data.groupby('security_code', as_index=False).apply(
  daily_adju

处理完成: 成功 854, 跳过 0, 错误 0


In [17]:
import os
import glob
import pandas as pd
import numpy as np
import logging
import traceback
import multiprocessing
from functools import partial
import gc

# 假设logger已经在其他地方定义
logger = logging.getLogger(__name__)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)


def process_single_date(file_dates, i, window_size=1200, daily_min=400, output_dir_min="./factors/minute_level_APB/1200_Rolling_Min", output_dir_day="./factors/minute_level_APB/1200_Rolling_Day"):
    """处理单个日期的数据"""
    current_date, current_file = file_dates[i]
    logger.info(f"处理日期 {current_date} 的数据")
    
    # 检查输出文件是否已存在
    output_file_min = os.path.join(output_dir_min, f"{current_date}.parquet")
    output_file = os.path.join(output_dir_day, f"{current_date}.parquet")
    
    if os.path.exists(output_file):
        logger.info(f"日期 {current_date} 的结果文件已存在，跳过处理")
        return current_date, True
    
    try:
        # 获取当前日期和前5天的文件
        start_idx = max(0, i-6)  # 确保不会索引到负数
        recent_files = [path for _, path in file_dates[start_idx:i+1]]
        
        # 读取这些天的数据
        dfs = []
        for file_path in recent_files:
            try:
                df = pd.read_parquet(file_path)
                dfs.append(df)
            except Exception as e:
                logger.error(f"读取文件 {file_path} 时出错: {str(e)}")
                continue
        
        if not dfs:
            logger.warning(f"日期 {current_date} 没有有效数据，跳过")
            return current_date, False
        
        # 合并数据
        combined_data = pd.concat(dfs, ignore_index=True)
        
        # 只处理当前日期的结果
        # 使用透视表方式计算滚动平均
        # 先创建一个唯一的时间索引，确保数据按时间顺序排列
        combined_data.sort_values(['security_code', 'date_str', 'order_minute'], inplace=True)
        combined_data['time_idx'] = combined_data.groupby('security_code').cumcount()

        combined_data['total_value'] = combined_data['adj_vwap'] * combined_data['total_volume']
        
        # 为每个股票创建透视表
        pivot_data_twap = combined_data.pivot_table(
            index='time_idx', 
            columns='security_code', 
            values='adj_twap',
            aggfunc='first'  # 如果有重复的time_idx，取第一个值
        )

        pivot_data_volume = combined_data.pivot_table(
            index='time_idx', 
            columns='security_code', 
            values='total_volume',
            aggfunc='first'  # 如果有重复的time_idx，取第一个值
        )

        pivot_data_value = combined_data.pivot_table(
            index='time_idx', 
            columns='security_code', 
            values='total_value',
            aggfunc='first'  # 如果有重复的time_idx，取第一个值
        )
        
        # 计算滚动平均
        rolling_avg_twap = pivot_data_twap.rolling(window=window_size, min_periods=daily_min).mean()
        rolling_sum_volume = pivot_data_volume.rolling(window=window_size, min_periods=daily_min).sum()
        rolling_sum_value = pivot_data_value.rolling(window=window_size, min_periods=daily_min).sum()

        # 将结果转回长格式
        rolling_result_twap = rolling_avg_twap.stack().reset_index()
        rolling_result_twap.columns = ['time_idx', 'security_code', 'rolling_twap']
        
        rolling_result_value = rolling_sum_value.stack().reset_index()
        rolling_result_value.columns = ['time_idx', 'security_code', 'rolling_value']
        
        rolling_result_volume = rolling_sum_volume.stack().reset_index()
        rolling_result_volume.columns = ['time_idx', 'security_code', 'rolling_volume']
        
        # 首先合并两个DataFrame
        rolling_result = pd.merge(
            rolling_result_twap,
            rolling_result_value,
            on=['security_code', 'time_idx'],
            how='inner'
        )
        
        # 然后合并第三个DataFrame
        rolling_result = pd.merge(
            rolling_result,
            rolling_result_volume,
            on=['security_code', 'time_idx'],
            how='inner'
        )

        # 与原始数据合并，获取日期信息
        rolling_result = pd.merge(
            rolling_result,
            combined_data[['security_code', 'time_idx', 'date_str', 'order_minute']],
            on=['security_code', 'time_idx'],
            how='left'
        )
        
        # 只保留当前日期的数据
        current_date_result = rolling_result[rolling_result['date_str'] == current_date].copy()

        # 计算rolling_apb
        current_date_result['rolling_apb'] = np.log(current_date_result['rolling_twap'] * 
                                                current_date_result['rolling_volume'] / 
                                                current_date_result['rolling_value'])
        
        current_date_result['rolling_apb'].replace([np.inf, -np.inf], np.nan, inplace=True)

        current_date_result = current_date_result[['date_str','order_minute','security_code','rolling_apb','rolling_twap','rolling_volume','rolling_value']]
        
        # 确保输出目录存在
        os.makedirs(output_dir_min, exist_ok=True)
        current_date_result.to_parquet(output_file_min)

        # 计算每日平均值
        daily_factor = current_date_result.groupby(['date_str', 'security_code'])['rolling_apb'].mean().reset_index()
        daily_factor.columns = ['date', 'security_code', 'rolling_apb']
        
        # 确保输出目录存在
        os.makedirs(output_dir_day, exist_ok=True)
        # 保存当天的结果到parquet文件
        daily_factor.to_parquet(output_file)
        logger.info(f"已保存日期 {current_date} 的结果到 {output_file}")
        
        # 清理内存
        del combined_data, pivot_data_twap, pivot_data_volume, pivot_data_value
        del rolling_avg_twap, rolling_sum_volume, rolling_sum_value
        del rolling_result_twap, rolling_result_value, rolling_result_volume
        del rolling_result, current_date_result, daily_factor, dfs
        gc.collect()
        
        return current_date, True
        
    except Exception as e:
        logger.error(f"处理日期 {current_date} 时发生错误: {str(e)}")
        logger.error(traceback.format_exc())
        return current_date, False



def calculate_rolling_average(window_size=1200, daily_min=400, start_date=None, end_date=None, num_processes=15):
    """计算滚动平均APB因子，采用多进程处理方式"""
    data_dir = "./factors/minute_level_APB"  
    parquet_files = sorted(glob.glob(os.path.join(data_dir, "*.parquet")))

    if not parquet_files:
        raise ValueError("未找到分钟级数据文件")

    logger.info(f"找到 {len(parquet_files)} 个分钟级数据文件")
    
    # 假设文件名是日期格式，如 "20210602.parquet"
    # 提取文件名中的日期并排序
    file_dates = []
    for file_path in parquet_files:
        file_name = os.path.basename(file_path)
        date_str = file_name.split('.')[0]  # 假设文件名格式为 "YYYYMMDD.parquet"
        file_dates.append((date_str, file_path))
    
    # 按日期排序
    file_dates.sort(key=lambda x: x[0])
    
    # 如果指定了日期范围，筛选文件
    if start_date and end_date:
        file_dates = [(date, path) for date, path in file_dates 
                      if start_date <= date <= end_date]
    
    if not file_dates:
        raise ValueError("筛选后没有符合条件的数据文件")
    
    output_dir_min = "./factors/minute_level_APB/1200_Rolling_Min"
    output_dir_day = "./factors/minute_level_APB/1200_Rolling_Day"

    # 确保输出目录存在
    os.makedirs(output_dir_min, exist_ok=True)
    os.makedirs(output_dir_day, exist_ok=True)

    # 准备要处理的日期索引列表（从第6个文件开始，确保有前5天的数据）
    indices_to_process = list(range(5, len(file_dates)))
    
    # 使用多进程处理
    with multiprocessing.Pool(processes=num_processes) as pool:
        # 使用partial固定其他参数，只让索引i变化
        process_func = partial(
            process_single_date, 
            file_dates, 
            window_size=window_size, 
            daily_min=daily_min,
            output_dir_min=output_dir_min,
            output_dir_day=output_dir_day
        )
        
        # 并行处理所有日期
        results = pool.map(process_func, indices_to_process)
    
    # 处理结果
    success_count = sum(1 for _, success in results if success)
    logger.info(f"处理完成，成功处理 {success_count} 个日期，总共 {len(indices_to_process)} 个日期")
    
    return output_dir_day



In [18]:
def calculate_rolling_20d_avg(factor_names, factor_dir, min_window=5):
    
    if not isinstance(factor_names, list):
        factor_names = [factor_names]
    
    parquet_pattern = os.path.join(factor_dir, "*.parquet")
    parquet_files = glob.glob(parquet_pattern)
    print(f"在目录 {factor_dir} 中找到 {len(parquet_files)} 个Parquet文件")
    

    conn = duckdb.connect(database=':memory:')
    all_factor_data = conn.execute(f"""
        SELECT * FROM read_parquet('{parquet_pattern}')
    """).fetchdf()
    conn.close()
    

    missing_factors = [f for f in factor_names if f not in all_factor_data.columns]
    if missing_factors:
        raise ValueError(f"在Parquet文件中未找到以下因子列: {', '.join(missing_factors)}")
    
    all_factor_data['date'] = pd.to_datetime(all_factor_data['date'])

    all_factor_data = all_factor_data.sort_values(['date', 'security_code'])
    

    # 获取所有个股代码
    all_securities = all_factor_data['security_code'].unique()

    result_df = all_factor_data[['date', 'security_code']].copy()

    # 为每个因子计算滚动平均
    for factor_name in factor_names:
        print(f"\n处理因子: {factor_name}")
        
        #转置成宽表
        pivot_data = all_factor_data.pivot(index='date', columns='security_code', values=factor_name)

        # 对宽表直接应用rolling
        rolling_avg = pivot_data.rolling(window=20, min_periods=min_window).mean()

        # 将结果转换回长格式
        factor_df = rolling_avg.stack().reset_index()
        factor_df.columns = ['date', 'security_code', factor_name]
        # factor_df[factor_name] = - factor_df[factor_name]

        # 计算每日因子覆盖率
        # 计算每个日期非NaN的因子值数量
        non_nan_counts = factor_df.dropna(subset=[factor_name]).groupby('date').size()

        # 计算覆盖率
        coverage = non_nan_counts / len(all_securities)
        print(f"\n{factor_name}因子覆盖率统计: 平均={coverage.mean():.2f}, 最小={coverage.min():.2f}")

        result_df = pd.merge(
            result_df, 
            factor_df,
            on=['date', 'security_code'],
            how='left'
        )
    
    return result_df

In [19]:
def main():
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    
    # 调用多进程函数
    rolling_df_dir = calculate_rolling_average(
        window_size=1200, 
        daily_min=400,
        start_date="20210518", 
        end_date="20241231",
        num_processes=15
    )

    factor_name ='rolling_apb'
    daily_factor = calculate_rolling_20d_avg(factor_name, rolling_df_dir)
    
    # 保存结果为CSV
    output_path = "/data/home/lexuanchen/Factors/Order/Signal/Improved_APB"
    os.makedirs(output_path, exist_ok=True)
    output_file_path = f"{output_path}/MinuteRoll_Raw_Order_APB.csv"
    daily_factor.to_csv(output_file_path, index=False)
    
    print("跨日复权分钟版APB因子计算完成")
    print(daily_factor.head())

if __name__ == "__main__":
    main()

2025-06-24 16:03:34,012 - __main__ - INFO - 找到 854 个分钟级数据文件
2025-06-24 16:03:34,434 - __main__ - INFO - 处理日期 20210525 的数据
2025-06-24 16:03:34,435 - __main__ - INFO - 处理日期 20210616 的数据
2025-06-24 16:03:34,437 - __main__ - INFO - 处理日期 20210707 的数据
2025-06-24 16:03:34,438 - __main__ - INFO - 处理日期 20210729 的数据
2025-06-24 16:03:34,440 - __main__ - INFO - 处理日期 20210910 的数据
2025-06-24 16:03:34,443 - __main__ - INFO - 处理日期 20211104 的数据
2025-06-24 16:03:34,443 - __main__ - INFO - 处理日期 20211013 的数据
2025-06-24 16:03:34,449 - __main__ - INFO - 处理日期 20211129 的数据
2025-06-24 16:03:34,464 - __main__ - INFO - 处理日期 20220117 的数据
2025-06-24 16:03:34,457 - __main__ - INFO - 处理日期 20211223 的数据
2025-06-24 16:03:34,483 - __main__ - INFO - 处理日期 20220329 的数据
2025-06-24 16:03:34,470 - __main__ - INFO - 处理日期 20220215 的数据
2025-06-24 16:03:34,439 - __main__ - INFO - 处理日期 20210820 的数据
2025-06-24 16:03:34,476 - __main__ - INFO - 处理日期 20220308 的数据
2025-06-24 16:03:34,488 - __main__ - INFO - 处理日期 20220421 的数据
The behavi

在目录 ./factors/minute_level_APB/1200_Rolling_Day 中找到 849 个Parquet文件


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))


处理因子: rolling_apb

rolling_apb因子覆盖率统计: 平均=0.90, 最小=0.75
跨日复权分钟版APB因子计算完成
        date security_code  rolling_apb
0 2021-05-25        000001          NaN
1 2021-05-25        000002          NaN
2 2021-05-25        000004          NaN
3 2021-05-25        000005          NaN
4 2021-05-25        000006          NaN
