In [5]:
import os
import xlwings as xw
import pandas as pd
import re

main_path = r'D:\JupyterLabFiles\Futures\Futures_exchange_data\INE'
os.chdir(main_path)

#获取工作列表脚本
def get_file_list(main_path):
    files_path = []
    for root, dirs, files in os.walk(main_path):
        for file in files:
            if file.endswith(('.csv', '.xls', '.xlsx')) and not file.startswith('~$'):
                files_path.append(os.path.join(root, file))
    return files_path

def process_file(file):
    #AKSHARE的期货列名对应关系字典
    futures_column_mapping = {
    "合约" : "symbol",
    "日期" : "date",
    "开盘价" : "open",
    "最高价" : "high",
    "最低价" : "low",
    "收盘价" : "close",
    "成交量" : "volume",
    "持仓量" : "open_interest",
    "成交金额" : "turnover",
    "结算价" : "settle",
    "前结算" : "pre_settle",
    }

    try:
        df = pd.read_excel(file, header=2)
        df['合约'] = df['合约'].ffill()
        df = df.drop(df[df['合约'].str.contains('注', na = False)].index)
        df = df.dropna(subset=['日期'])
        futures_columns_to_keep = list(futures_column_mapping.keys())
        df = df[futures_columns_to_keep]
        #更改列名
        df = df.rename(columns = futures_column_mapping)
        #创建Variety列
        df['variety'] = df['symbol'].str.extract(r'([a-zA-Z]+)')
        print(f'文件{file}处理完成!')
        return df
    except Exception as e:
        print(f'文件{file}出错{e}')
        return None

def table_combine(tables):
    tables = pd.concat(tables, ignore_index=True)
    date_error_rows = []
    for index, row in tables.iterrows():
        if not re.match(r'^\d{8}$', str(row['date'])):  # 使用正则表达式匹配8位数字日期
            date_error_rows.append(index)
    for i in date_error_rows:
        tables = tables.drop([i])
    futures = tables[tables['symbol'].str.match(r'^([a-z]+)\d+$')]  # 匹配'al0501'样式的合约代码
    options = tables[tables['symbol'].str.match(r'^([a-z]+)\d+[CP]\d+$')] #匹配'al0501C45000,al0501P45000'样式合约代码
    options.to_csv('INE_options.csv', index=False, encoding='gbk')
    print('期权数据 INE_options.csv 已保存!')
    futures.to_csv('INE_futures.csv', index=False, encoding='gbk')
    print('期货数据 INE_futures.csv 已保存!')


files = get_file_list(main_path)
tables = [process_file(file) for file in files]
table_combine(tables)

文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2018.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2019.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2020.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2021.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2022.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2023.1月-12月.xls处理完成!
文件D:\JupyterLabFiles\Futures\Futures_exchange_data\INE\datas\ine所内合约行情报表2024.1月-12月.xlsx处理完成!
期权数据 INE_options.csv 已保存!
期货数据 INE_futures.csv 已保存!
