In [26]:
import os
import pandas as pd

main_path = r'D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas'
os.chdir(main_path)

#获取工作列表脚本
def get_file_list(main_path):
    files_path = []
    for root, dirs, files in os.walk(main_path):
        for file in files:
            if file.endswith(('.csv', '.xls', '.xlsx')) and not file.startswith('~$'):
                files_path.append(os.path.join(root, file))
    return files_path

#处理文件脚本
def process_file(file):
    #AKSHARE的期货列名对应关系字典
    futures_column_mapping = {
    "合约代码" : "symbol",
    "交易日期" : "date",
    "开盘价" : "open",
    "最高价" : "high",
    "最低价" : "low",
    "收盘价" : "close",
    "成交量" : "volume",
    "持仓量" : "open_interest",
    "成交额" : "turnover",
    "结算价" : "settle",
    "前结算价" : "pre_settle",
    }
    #保存的期权列名
    options_column_mapping ={
    "品种名称" : "future_name",
    "合约代码" : "option_name",
    "交易日期" : "trade_date",
    "开盘价" : "open",
    "最高价" : "high",
    "最低价" : "low",
    "收盘价" : "close",
    "前结算价" : "pre_settle",
    "结算价" : "settle",
    "DELTA" : "delta",
    "成交量" : "volume",
    "持仓量" : "open_interest",
    "成交额" : "trnover",
    "行权量" : "exercise_vol"
    }
    try:
        df = pd.read_csv(file, header=1)
        if 'DELTA' in df.columns:
            options_columns_to_keep = list(options_column_mapping.keys())
            df = df[options_columns_to_keep]
            #更改列名
            df = df.rename(columns = options_column_mapping)
            print(f'文件{file}处理完成！')
            return df, 'option'
        else:
            futures_columns_to_keep = list(futures_column_mapping.keys())
            df = df[futures_columns_to_keep]
            #更改列名
            df = df.rename(columns = futures_column_mapping)
            #创建Variety列
            df['variety'] = df['symbol'].str.extract(r'([a-zA-Z]+)')
            print(f'文件{file}处理完成!')
            return df, 'future'
    except Exception as e:
        print(f'文件{file}出错{e}')
        return None

files = get_file_list(main_path)
tables = [process_file(file) for file in files]
futures = [i[0] for i in tables if i[1] == 'future']
options = [i[0] for i in tables if i[1] == 'option']
option_combined = pd.concat(options, ignore_index=True)
option_combined.to_csv('GFE_options.csv', index=False, encoding='gbk')
print('期权数据 GFE_options.csv 已保存!')
future_combined = pd.concat(futures, ignore_index=True)
future_combined.to_csv('GFE_futures.csv', index=False, encoding='gbk')
print('期货数据 GFE_futures.csv 已保存')


文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLFUTURES2022.csv处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLFUTURES2023.csv处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLFUTURES2024.csv处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLOPTIONS2022.csv处理完成！
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLOPTIONS2023.csv处理完成！
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\GFE\datas\ALLOPTIONS2024.csv处理完成！
期权数据 GFE_options.csv 已保存!
期货数据 GFE_futures.csv 已保存


In [24]:
tables[0][0]

Unnamed: 0,symbol,date,open,high,low,close,volume,open_interest,turnover,settle,pre_settle,variety
0,si2308,20221222,19100.0,19720.0,18160.0,18310.0,28742,4710,2664569100,18540.0,18500.0,si
1,si2309,20221222,18500.0,19165.0,18100.0,18215.0,1484,574,137884550,18580.0,18500.0,si
2,si2310,20221222,18500.0,19220.0,18070.0,18150.0,613,222,56764000,18520.0,18500.0,si
3,si2311,20221222,18500.0,19120.0,17915.0,18000.0,1234,616,112559425,18240.0,18500.0,si
4,si2312,20221222,18800.0,20660.0,18290.0,18355.0,1129,507,106021025,18780.0,18500.0,si
5,si2308,20221223,18180.0,18220.0,17060.0,17060.0,30354,7629,2645073250,17425.0,18540.0,si
6,si2309,20221223,18295.0,18295.0,17095.0,17095.0,830,862,71863700,17315.0,18580.0,si
7,si2310,20221223,17790.0,17900.0,17040.0,17040.0,167,246,14449700,17305.0,18520.0,si
8,si2311,20221223,17615.0,17685.0,16790.0,16805.0,332,714,28478500,17155.0,18240.0,si
9,si2312,20221223,17930.0,17930.0,17280.0,17280.0,193,503,16875475,17485.0,18780.0,si


In [19]:
for table in tables:
    print(table.head())

       交易日期 品种名称  交割月份    合约代码     前结算价      开盘价      最高价      最低价      收盘价  \
0  20221222  工业硅  2308  si2308  18500.0  19100.0  19720.0  18160.0  18310.0   
1  20221222  工业硅  2309  si2309  18500.0  18500.0  19165.0  18100.0  18215.0   
2  20221222  工业硅  2310  si2310  18500.0  18500.0  19220.0  18070.0  18150.0   
3  20221222  工业硅  2311  si2311  18500.0  18500.0  19120.0  17915.0  18000.0   
4  20221222  工业硅  2312  si2312  18500.0  18800.0  20660.0  18290.0  18355.0   

       结算价     涨跌    涨跌1    成交量   持仓量  持仓量变化         成交额  
0  18540.0 -190.0   40.0  28742  4710   4710  2664569100  
1  18580.0 -285.0   80.0   1484   574    574   137884550  
2  18520.0 -350.0   20.0    613   222    222    56764000  
3  18240.0 -500.0 -260.0   1234   616    616   112559425  
4  18780.0 -145.0  280.0   1129   507    507   106021025  
       交易日期 品种名称  交割月份    合约代码     前结算价      开盘价      最高价      最低价      收盘价  \
0  20230103  工业硅  2308  si2308  18070.0  17870.0  18040.0  17600.0  17770.0   
1  20230103  