In [1]:
import pandas as pd
import os
import chardet
import re
import logging

def process_txt_file(file):
     #检测文件编码
     with open(file, 'rb') as f:
         detacted = chardet.detect(f.read())
         encoding = detacted['encoding']

     #按行读取文件
     with open(file, 'r', encoding = encoding) as f:
         lines = f.readlines()

     #清洗文件中的特殊字符，分列，制表
     table0 = [line.split('|') for line in lines[1:]]
     table = []
     for line in table0:
         table.append([cell.strip() for cell in line[0:14]])
     df = pd.DataFrame(data = table[1:], columns= table[0])

     #AKSHARE的列名对应关系字典
     column_mapping = {
     "合约代码": "symbol",
     "交易日期": "date",
     "今开盘": "open",
     "最高价": "high",
     "最低价": "low",
     "今收盘": "close",
     "成交量(手)": "volume",
     "持仓量": "open_interest",
     "成交额(万元)": "turnover",
     "今结算": "settle",
     "昨结算": "pre_settle",
     }

    options_column_mapping ={
    "品种代码" : "option_name",
    "交易日期" : "trade_date",
    "今开盘" : "open",
    "最高价" : "high",
    "最低价" : "low",
    "今盘价" : "close",
    "昨结算" : "pre_settle",
    "今结算" : "settle",
    "DELTA" : "delta",
    "成交量(手)" : "volume",
    "持仓量" : "open_interest",
    "成交额(万元)" : "trnover",
    "行权量" : "exercise_vol"
    }

     #保留必要的列
     columns_to_keep = list(options_column_mapping.keys())
     df = df[columns_to_keep]

     #更改列名
     df = df.rename(columns = options_column_mapping)

     #创建Variety列
     df['variety'] = df['symbol'].str.extract(r'([A-Z]+)')
     #将YYYY-MM-DD格式日期改为YYYYMMDD格式
     df['date'] =pd.to_datetime(df['date']).dt.strftime('%Y%m%d')
     return df

def combine_txt_files(folder_path):
    """遍历指定目录下的所有 TXT 文件，并合并它们。"""
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

    all_df = []
    
    for filename in file_list:
        filepath = os.path.join(folder_path, filename)
        df = process_txt_file(filepath) #传递column_mapping参数

        if df is None:
            logging.error(f"处理文件 {filepath} 失败，停止合并。")
            return None

        all_df.append(df)
    
    if not all_df: #如果没有任何文件被成功读取，则返回None
        logging.warning(f"没有成功读取任何文件。")
        return None

    combined_df = pd.concat(all_df, ignore_index=True)
    logging.info(f"文件夹 {folder_path} 处理完成，共合并 {len(combined_df)} 行数据。")
    return combined_df



In [4]:
# 示例用法
folder_path = r"D:\JupyterLabFiles\Futures\Futures_exchange_data\CZCE\Options"  # 替换为实际的文件夹路径

combined_df = combine_txt_files(folder_path) #传递column_mapping参数

if combined_df is not None:
    print(combined_df.head())
    combined_df.to_csv("combined_Options.csv", index=False, encoding='gbk')
    logging.info(f"合并后的数据已保存到 combined_txt1.csv。")
else:
    logging.error("处理过程中出现错误。")

  symbol      date    open    high     low   close   volume open_interest  \
0  CF001  20100104  16,100  16,370  16,100  16,250    1,244        10,748   
1  CF003  20100104  16,305  16,540  16,295  16,370    1,734         9,614   
2  CF005  20100104  16,600  16,845  16,530  16,610  498,986       265,668   
3  CF007  20100104  16,745  17,040  16,725  16,880    2,534         5,982   
4  CF009  20100104  17,290  17,635  17,210  17,465  148,778        95,246   

       turnover  settle pre_settle variety  
0     10,127.65  16,305     16,090      CF  
1     14,264.61  16,455     16,260      CF  
2  4,170,386.05  16,715     16,525      CF  
3     21,441.21  16,925     16,685      CF  
4  1,301,998.35  17,505     17,135      CF  
