# 合并数据_汇总：深调套利收入
读取 `data_out` 中的交易量价数据，并按照提供的公式计算 `深调套利收入`。

In [1]:
from pathlib import Path
import pandas as pd

OUTPUT_PATH = Path('../data_output/合并数据_汇总.xlsx')
SUMMARY_KEY = '合并数据_汇总'
SOURCE_KEY = '交易量价数据信息'

if not OUTPUT_PATH.exists():
    raise FileNotFoundError(f'找不到合并后的输出文件: {OUTPUT_PATH}')

data_out = pd.read_excel(OUTPUT_PATH, sheet_name=None)
sheet_order = list(data_out.keys())

if SUMMARY_KEY not in data_out:
    if SOURCE_KEY not in data_out:
        raise KeyError('在工作簿中找不到 `交易量价数据信息` 表，无法创建合并数据。')
    data_out[SUMMARY_KEY] = data_out[SOURCE_KEY].copy()
    sheet_order.append(SUMMARY_KEY)

print(f'已读取 {len(data_out)} 张表，目标: {SUMMARY_KEY}')


已读取 4 张表，目标: 合并数据_汇总


In [2]:
summary_df = data_out[SUMMARY_KEY].copy()
info_df = data_out['基础信息'].copy()

required_columns = ['日前中标出力', '省内中长期上网电量', '日前出清节点价格', '省内中长期均价', '日期', '公司名称', '日内实际出力']
missing_columns = [col for col in required_columns if col not in summary_df.columns]
if missing_columns:
    raise ValueError(f'{SUMMARY_KEY} 缺少以下列: {missing_columns}')

if '机组容量' not in info_df.columns or '机组名称' not in info_df.columns:
    raise ValueError(f'基础信息 缺少必需列: 机组容量 或 机组名称')

# 日期筛选
start_date = pd.to_datetime('2026-01-26')  # 设置起始日期
end_date = pd.to_datetime('2026-01-26')    # 设置结束日期

summary_df['日期'] = pd.to_datetime(summary_df['日期'])
summary_df = summary_df[(summary_df['日期'] >= start_date) & (summary_df['日期'] <= end_date)]

# 新增筛选：日前出清节点价格 [0, 200]
summary_df = summary_df[(summary_df['日前出清节点价格'] >= 0) & (summary_df['日前出清节点价格'] <= 200)]

print(f'筛选后剩余数据: {len(summary_df)} 行')

def to_numeric(series):
    return pd.to_numeric(series, errors='coerce')

bid_power = to_numeric(summary_df['日前中标出力'])
contract_power = to_numeric(summary_df['省内中长期上网电量'] + summary_df['省间中长期上网电量'])
intra_prov_contact_power = to_numeric(summary_df['省内中长期上网电量'])
inter_prov_contact_power = to_numeric(summary_df['省间中长期上网电量'])
spot_price = to_numeric(summary_df['日前出清节点价格'])
intra_prov_contact_avg_price = to_numeric(summary_df['省内中长期均价'])
inter_prov_contact_avg_price = to_numeric(summary_df['省间中长期均价'])
contract_price = (intra_prov_contact_power * intra_prov_contact_avg_price + inter_prov_contact_power * inter_prov_contact_avg_price) / contract_power

condition = (bid_power < contract_power * 4) & (spot_price < contract_price)
spread = (contract_power * 4 - bid_power) * (contract_price - spot_price) / 4
summary_df['深调套利收入'] = spread.where(condition, 0).fillna(0)
summary_df['contract_power'] = contract_power

# 合并装机容量
capacity_mapping = info_df.set_index('机组名称')['机组容量'].to_dict()
summary_df['装机容量'] = summary_df['机组名称'].map(capacity_mapping)
summary_df['装机容量'] = to_numeric(summary_df['装机容量'])

# 按公司名称汇总
result_df = summary_df.groupby('公司名称').agg(
    日前0价时长_小时_=('日前出清节点价格', lambda x: (x == 0).sum() / 4),
    现货价格_=('日前出清节点价格', 'mean'),
    深调平均负荷_=('日内实际出力', lambda x: to_numeric(x).sum() / to_numeric(summary_df.loc[x.index, '装机容量']).sum()),
    中长期平均持仓_=('contract_power', 'mean'),
    深调套利_元_=('深调套利收入', 'sum')
).reset_index()

result_df.columns = ['单位', '日前0价时长（小时）', '现货价格', '深调平均负荷', '中长期平均持仓', '深调套利（元）']
result_df

筛选后剩余数据: 638 行


Unnamed: 0,单位,日前0价时长（小时）,现货价格,深调平均负荷,中长期平均持仓,深调套利（元）
0,临汾,12.5,20.724138,0.516062,46.051172,13610.33486
1,侯马,12.5,20.724138,0.465366,40.783879,14844.91207
2,同华,12.5,20.724138,0.895067,115.187948,505339.031056
3,同承,25.0,20.724138,0.087521,6.558,992.81595
4,同达,12.5,20.724138,0.470422,58.024052,285461.107954
5,塔山,12.5,20.724138,0.664767,,0.0
6,河津,12.5,20.724138,0.340167,,0.0
7,王坪,12.5,20.724138,0.307014,,0.0
8,蒲洲,12.5,20.724138,0.428723,58.960086,193533.161988
9,阳高,12.5,20.724138,0.425405,,0.0
