# 安装依赖

In [1]:
!pip install numpy pandas scikit-learn matplotlib seaborn holidays datetime mesa lightgbm xgboost

Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple
Collecting holidays
  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/c1/e4/63aa666d39dddfeaaaff1f58416d2a9c756c9b88037eae1b0d954c06d5bd/holidays-0.54-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hCollecting datetime
  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/f3/78/8e382b8cb4346119e2e04270b6eb4a01c5ee70b47a8a0244ecdb157204f7/DateTime-5.5-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting mesa
  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/88/95/0dbcd57efdc1dce794570de79da6d27ea8d22ea5d703c2a886ff045fc65b/mesa-2.3.2-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.7/65.7 kB[0m [31m22.3 MB/s[0m eta 

# 导入相关包

In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from mesa import Agent, Model
from mesa.time import RandomActivation
from mesa.datacollection import DataCollector
from pathlib import Path
import seaborn as sns
import os
from datetime import timedelta
from tqdm import tqdm
import seaborn as sns
import matplotlib.pylab as plt
from pathlib import Path
import warnings
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import StratifiedKFold, KFold, GroupKFold
from xgboost import XGBRegressor

warnings.filterwarnings('ignore')
plt.style.use('ggplot')
plt.rcParams['font.sans-serif'] = ["WenQuanYi Micro Hei",'SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 读取数据

In [3]:

base_path = Path("data")  # 确保数据都放在同级的data目录下

# 读取市场数据
electricity_price = pd.read_csv(base_path / "electricity price.csv")
# 读取市场主体（各发电机组）数据
unit = pd.read_csv(base_path / "unit.csv")

"""
准备示例提交数据sample_submit
1. electricity_price["clearing price (CNY/MWh)"].isna()找到出清价格为缺失值的行，即要预测的目标
2. 去除demand列，符合最后的提交格式 
"""

sample_submit = electricity_price[electricity_price["clearing price (CNY/MWh)"].isna()].drop(columns="demand")
sample_submit.to_csv(base_path / "sample_submit.csv", index=False)

# 特征工程

特征工程是数据预处理过程的一部分，涉及从原始数据中提取和创建新特征，以提高机器学习模型的性能。它包括清理数据、处理缺失值、转换变量类型、标准化或归一化数据，以及创建新的衍生特征。特征工程的目标是通过选择和构建合适的特征，使模型能够更好地理解数据和预测目标变量。

In [4]:
# 将day和time列合并成timestamp列，便于提取时间戳特征
electricity_price["timestamp"] = pd.to_datetime(
    electricity_price["day"] + " " + electricity_price["time"].str.replace("24:00:00", "00:00"))

# 处理24:00:00的情况，即表示第二天的00:00:00
mask = electricity_price['timestamp'].dt.time == pd.Timestamp('00:00:00').time()

# 需要将这些行的日期部分加一天
electricity_price.loc[mask, 'timestamp'] += pd.Timedelta(days=1)

# 设置列的顺序，同时去除day和time列
electricity_price = electricity_price[["timestamp", "demand", "clearing price (CNY/MWh)","time"]]

In [5]:
# 处理缺失值
imputer = SimpleImputer(strategy='mean')
electricity_price["demand"] = imputer.fit_transform(electricity_price["demand"].values.reshape(-1, 1))

In [6]:
# 提取时间特征
electricity_price["hour"] = electricity_price["timestamp"].dt.hour
electricity_price["day"] = electricity_price["timestamp"].dt.day
electricity_price["month"] = electricity_price["timestamp"].dt.month
electricity_price["year"] = electricity_price["timestamp"].dt.year
electricity_price["weekday"] = electricity_price["timestamp"].dt.weekday
electricity_price["quarter"] = electricity_price["timestamp"].dt.quarter
electricity_price["is_windy_season"] = electricity_price["timestamp"].dt.month.isin([1, 2, 3, 4, 5, 9, 10, 11, 12])
electricity_price["is_valley"] = electricity_price["timestamp"].dt.hour.isin([10, 11, 12, 13, 14, 15])

## 节假日特征

In [7]:
# 节假日处理
def generate_holiday_dates(start_dates, duration):
    holidays = []
    for start_date in start_dates:
        holidays.extend(pd.date_range(start=start_date, periods=duration).tolist())
    return holidays

spring_festival_start_dates = ["2022-01-31", "2023-01-21", "2024-02-10"]
labor_start_dates = ["2022-04-30", "2023-04-29"]

spring_festivals = generate_holiday_dates(spring_festival_start_dates, 7)
labor = generate_holiday_dates(labor_start_dates, 5)

electricity_price["is_spring_festival"] = electricity_price["timestamp"].isin(spring_festivals)
electricity_price["is_labor"] = electricity_price["timestamp"].isin(labor)


In [8]:
electricity_price.head()  # 显示前5行数据

Unnamed: 0,timestamp,demand,clearing price (CNY/MWh),time,hour,day,month,year,weekday,quarter,is_windy_season,is_valley,is_spring_festival,is_labor
0,2021-12-01 00:15:00,40334.18,350.8,0:15,0,1,12,2021,2,4,True,False,False,False
1,2021-12-01 00:30:00,40523.15,350.8,0:30,0,1,12,2021,2,4,True,False,False,False
2,2021-12-01 00:45:00,40374.74,350.8,0:45,0,1,12,2021,2,4,True,False,False,False
3,2021-12-01 01:00:00,40111.55,350.8,1:00,1,1,12,2021,2,4,True,False,False,False
4,2021-12-01 01:15:00,40067.5,348.93,1:15,1,1,12,2021,2,4,True,False,False,False


In [9]:
unit.head()

Unnamed: 0,unit ID,Capacity（MW）,utilization hour (h),coal consumption (g coal/KWh),power consumption rate (%)
0,1,110.0,2069.12,266.07,6.91
1,2,160.0,5509.22,292.7,6.91
2,3,160.0,3562.79,293.35,6.91
3,4,160.0,5684.12,284.88,6.91
4,5,220.0,2231.35,323.08,8.54


In [10]:
# 独热编码
electricity_price = pd.get_dummies(
    data=electricity_price,
    columns=["hour", "day", "month", "year", "weekday", "quarter"],
    drop_first=True
)

## 电力损耗惩罚因子

In [11]:
# 预处理unit数据
unit['coal consumption (g coal/KWh)'] = unit['coal consumption (g coal/KWh)'] / (1 - unit['power consumption rate (%)'] / 100)
unit['Capacity（MW）'] = unit['Capacity（MW）']*(1 - unit['power consumption rate (%)'] / 100)

In [12]:
# 引入光伏数据（假设数据）
pv_unit = pd.DataFrame({
    "Capacity（MW）": np.random.uniform(50, 150, size=10),
    "coal consumption (g coal/KWh)": np.zeros(10),  # 光伏不消耗煤
    "is_solar": [True] * 10  # 标记为光伏发电机组
})

In [13]:
# 将光伏数据添加到unit数据中
unit["is_solar"] = False
unit = pd.concat([unit, pv_unit], ignore_index=True)

In [14]:
electricity_price["hour"] = electricity_price["timestamp"].dt.hour

# 使用ABM估计市场出清价格

In [15]:
# 定义发电机组代理
class GeneratorAgent(Agent):
    def __init__(self, unique_id, model, capacity, coal_consumption, is_solar=False):
        super().__init__(unique_id, model)
        self.capacity = capacity
        self.coal_consumption = coal_consumption
        self.is_solar = is_solar
        self.price = self.coal_consumption
        self.successful_bid = False

    def adjust_price(self, market_clearing_price, demand, hour):
        if self.is_solar:
            # 假设光伏发电在10-15点之间最活跃
            if hour >= 10 and hour <= 15:
                self.price = 0  # 光伏发电在活跃时段出价为0
            else:
                self.price = np.inf  # 非活跃时段不参与竞价
        else:
            # 其他机组根据市场清算价格调整出价
            if not self.successful_bid:
                self.price += (market_clearing_price - self.price) * 0.1

    def step(self):
        pass

In [16]:
# 定义电力市场模型
class ElectricityMarketModel(Model):
    def __init__(self, agents_data, electricity_price):
        self.schedule = RandomActivation(self)
        self.electricity_price = electricity_price
        
        for index, row in agents_data.iterrows():
            agent = GeneratorAgent(index, self, row['Capacity（MW）'], row['coal consumption (g coal/KWh)'], row['is_solar'])
            self.schedule.add(agent)

        self.datacollector = DataCollector(
            agent_reporters={"Capacity": "capacity", "Coal Consumption": "coal_consumption"}
        )

    def market_clear(self, demand, hour):
        bids = [(agent.price, agent) for agent in self.schedule.agents]
        bids.sort(key=lambda x: x[0])
        total_capacity = 0
        clearing_price = 0

        for price, agent in bids:
            total_capacity += agent.capacity
            if total_capacity >= demand:
                clearing_price = price
                agent.successful_bid = True
                break
            agent.successful_bid = False
        
        return clearing_price

    def step(self, demand, hour):
        market_clearing_price = self.market_clear(demand, hour)
        for agent in self.schedule.agents:
            agent.adjust_price(market_clearing_price, demand, hour)
        self.datacollector.collect(self)
        self.schedule.step()

In [17]:
# 初始化模型
sorted_unit = unit.sort_values("coal consumption (g coal/KWh)")
sorted_unit['cumulative_capacity'] = sorted_unit['Capacity（MW）'].cumsum()

In [18]:
# 创建市场模型并运行
market_model = ElectricityMarketModel(sorted_unit, electricity_price)

for i in range(96):
    hour = electricity_price.iloc[i]["hour"]
    demand = electricity_price.iloc[i]["demand"]
    market_model.step(demand, hour)

In [19]:
# 提取模拟数据
simulation_data = market_model.datacollector.get_agent_vars_dataframe()

# 使用ABM模拟数据和电力需求进行价格预测
prices = []
# for demand in electricity_price["demand"]:
for i in range(len(electricity_price)):
    demand = electricity_price["demand"].iloc[i]
    price = simulation_data[simulation_data['Capacity'].cumsum() >= demand]["Coal Consumption"].iloc[0]
    prices.append(price)


In [20]:
train_length=55392
train_data=electricity_price.copy(deep=True)

## 构造基于demand的窗口特征

In [21]:
def cal_range(x):
    """
    计算极差（最大值和最小值之差）。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    float: 极差值。

    示例：
    >>> import pandas as pd
    >>> x = pd.Series([1, 2, 3, 4, 5])
    >>> cal_range(x)
    4
    """
    return x.max() - x.min()


def increase_num(x):
    """
    计算序列中发生增长的次数。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    int: 序列中增长的次数。

    示例：
    >>> x = pd.Series([1, 2, 3, 2, 4])
    >>> increase_num(x)
    3
    """
    return (x.diff() > 0).sum()


def decrease_num(x):
    """
    计算序列中发生下降的次数。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    int: 序列中下降的次数。

    示例：
    >>> x = pd.Series([1, 2, 1, 3, 2])
    >>> decrease_num(x)
    2
    """
    return (x.diff() < 0).sum()


def increase_mean(x):
    """
    计算序列中上升部分的均值。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    float: 序列中上升部分的均值。

    示例：
    >>> x = pd.Series([1, 2, 3, 2, 4])
    >>> diff = x.diff()
    >>> diff
    0    NaN
    1    1.0
    2    1.0
    3   -1.0
    4    2.0
    dtype: float64
    >>> increase_mean(x)
    1.33
    """
    diff = x.diff()
    return diff[diff > 0].mean()


def decrease_mean(x):
    """
    计算序列中下降的均值（取绝对值）。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    float: 序列中下降的均值（绝对值）。

    示例：
    >>> import pandas as pd
    >>> x = pd.Series([4, 3, 5, 2, 6])
    >>> decrease_mean(x)
    2.0
    """
    diff = x.diff()
    return diff[diff < 0].abs().mean()


def increase_std(x):
    """
    计算序列中上升部分的标准差。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    float: 序列中上升部分的标准差。

    示例：
    >>> import pandas as pd
    >>> x = pd.Series([1, 2, 3, 2, 4])
    >>> increase_std(x)
    0.5773502691896257
    """
    diff = x.diff()
    return diff[diff > 0].std()


def decrease_std(x):
    """
    计算序列中下降部分的标准差。

    参数：
    x (pd.Series): 输入的时间序列数据。

    返回：
    float: 序列中下降部分的标准差。

    示例：
    >>> import pandas as pd
    >>> x = pd.Series([4, 3, 5, 2, 6])
    >>> decrease_std(x)
    1.4142135623730951
    """
    diff = x.diff()
    return diff[diff < 0].std()


In [22]:
from tqdm import tqdm  # 导入 tqdm 库用于显示进度条

# 定义滚动窗口大小的列表
window_sizes = [4, 12, 24]

# 遍历每个窗口大小
with tqdm(window_sizes) as pbar:
    for window_size in pbar:
        # 定义要应用的聚合函数列表
        functions = ["mean", "std", "min", "max", cal_range, increase_num,
                     decrease_num, increase_mean, decrease_mean, increase_std, decrease_std]

        # 遍历每个聚合函数
        for func in functions:
            # 获取函数名称，如果是字符串则直接使用，否则使用函数的 __name__ 属性
            func_name = func if type(func) == str else func.__name__

            # 生成新列名，格式为 demand_rolling_{window_size}_{func_name}
            column_name = f"demand_rolling_{window_size}_{func_name}"

            # 计算滚动窗口的聚合值，并将结果添加到 train_data 中
            train_data[column_name] = train_data["demand"].rolling(
                window=window_size,        # 滚动窗口大小
                min_periods=window_size//2,  # 最小观测值数
                closed="left"         # 滚动窗口在左侧闭合
            ).agg(func)              # 应用聚合函数

            pbar.set_postfix({"window_size": window_size, "func": func_name})


100%|██████████| 3/3 [04:45<00:00, 95.08s/it, window_size=24, func=decrease_std] 


# 创建训练集和测试集

In [23]:
# 添加新的特征列：demand_shift_1，表示将 demand 列中的值向后移动一位
# shift(1) 的结果是当前行的值等于前一行的值，第一行的值为 NaN
train_data["demand_shift_1"] = train_data["demand"].shift(1)

# 添加新的特征列：demand_diff_1，表示 demand 列中相邻值的差
# diff(1) 的结果是当前行的值减去前一行的值，第一行的值为 NaN
train_data["demand_diff_1"] = train_data["demand"].diff(1)

# 添加新的特征列：demand_pct_1，表示 demand 列中相邻值的百分比变化
# pct_change(1) 的结果是当前行的值减去前一行的值再除以前一行的值，第一行的值为 NaN
train_data["demand_pct_1"] = train_data["demand"].pct_change(1)


In [24]:
# 从 train_data 中创建训练集和测试集特征数据 (X) 和目标数据 (y)

# 创建训练集特征数据 X_train
# 1. 从 train_data 中选择前 train_length 行，去除 "price" 列
# 2. 使用 bfill 方法向后填充缺失值
# 3. 使用 ffill 方法向前填充缺失值
X_train = train_data.iloc[:train_length].drop(columns=["clearing price (CNY/MWh)"]).bfill().ffill()

# 创建测试集特征数据 X_test
X_test = train_data.iloc[train_length:].drop(columns=["clearing price (CNY/MWh)"]).bfill().ffill()

# 创建训练集目标数据 y_train
y_train = train_data.iloc[:train_length][["clearing price (CNY/MWh)"]]

# 删除 'timestamp' 列（或其他非数值列）
X_train = X_train.drop(columns=['timestamp'])
X_train = X_train.drop(columns=['time'])
X_train = X_train.drop(columns=['hour'])
X_test = X_test.drop(columns=['timestamp'])
X_test = X_test.drop(columns=['time'])
X_test = X_test.drop(columns=['hour'])
# 转换布尔类型特征为整数类型
X_train = X_train.astype(float)
X_test = X_test.astype(float)

# LGBMRegressor 模型进行时序预测

In [25]:
# 创建 LGBMRegressor 模型对象，设置参数
lgb_model = LGBMRegressor(num_leaves=2**5-1, n_estimators=300, verbose=-1)

# 使用训练集数据训练 LGBMRegressor 模型
# X_train：训练集特征数据
# y_train：训练集目标数据
lgb_model.fit(X_train, y_train)

# 使用训练好的 LGBMRegressor 模型预测测试集特征数据
# X_test：测试集特征数据
# 返回预测的目标值
lgb_pred = lgb_model.predict(X_test)


# 利用线性模型转换耗煤量为机组报价

In [None]:
# 创建线性回归模型对象
linear_model = LinearRegression()

# 使用训练集数据中的 "demand" 特征训练线性回归模型
# X_train[["demand"]]：训练集特征数据中仅包含 "demand" 列
# y_train：训练集目标数据
linear_model.fit(X_train[["demand"]], y_train)

# 使用训练好的线性回归模型预测测试集特征数据中的 "demand" 列
# X_test[["demand"]]：测试集特征数据中仅包含 "demand" 列
# 返回预测的目标值，并将结果展平为一维数组
linear_pred = linear_model.predict(X_test[["demand"]]).flatten()

# 模型融合

In [26]:
y_pred = 0.7*linear_pred+0.3*lgb_pred

# 模拟鸭子曲线

In [None]:

# 按照长度将y_pred分成三段进行修正，模拟鸭子曲线
n = len(y_pred)
segment1_end = n // 3
segment2_end = 2 * n // 3
# 对每一段应用不同的修正系数
y_pred[:segment1_end] *= 0.95
y_pred[segment1_end:segment2_end] *= 0.9
y_pred[segment2_end:] *= 0.85

y_pred = [f"{x:.4f}" for x in y_pred]

# 将每小时数据归一化

In [None]:
sample_submit["clearing price (CNY/MWh)"] = y_pred
# 定义一个函数来替换每组4个值为它们的最小值
def replace_with_min(group):
    min_value = group.min()
    return group.apply(lambda x: min_value)
# 对'clearing price (CNY/MWh)'列每4行进行分组，并替换为最小值，把一个小时内的四段归一成最小值
sample_submit['clearing price (CNY/MWh)'] = sample_submit['clearing price (CNY/MWh)'].groupby(sample_submit.index // 4).transform(replace_with_min)


# 考虑部分节假日特征

In [None]:
## 确保 'day' 列为日期类型
sample_submit['day'] = pd.to_datetime(sample_submit['day'])
# 筛选出2024年2月9日之后的十天的数据
start_date = pd.to_datetime('2024-02-08')
end_date = start_date + timedelta(days=10)
filtered_df = sample_submit[(sample_submit['day'] > start_date) & (sample_submit['day'] <= end_date)]

# 进一步筛选出时间为10:00到16:00的数据
filtered_df = filtered_df[filtered_df['time'].between('10:15', '16:00')]

filtered_df['clearing price (CNY/MWh)'] = pd.to_numeric(filtered_df['clearing price (CNY/MWh)'], errors='coerce')
# 对于每一天，找到最大值并进行调整
for day in filtered_df['day'].unique():
    day_data = filtered_df[filtered_df['day'] == day]
    if not day_data.empty:
        max_idx = day_data['clearing price (CNY/MWh)'].idxmax()
        max_value = day_data.loc[max_idx, 'clearing price (CNY/MWh)'] 
        # 将其他值设为-80
        sample_submit.loc[day_data.index, 'clearing price (CNY/MWh)'] = -80
        # 保留最大值
        for i in range(0,4):
            sample_submit.loc[max_idx+i, 'clearing price (CNY/MWh)'] = max_value*0.5

# # 筛选出2023年12月31日之后的两天的数据
# start_date = pd.to_datetime('2023-12-31')
# end_date = start_date + timedelta(days=2)
# filtered_df = sample_submit[(sample_submit['day'] > start_date) & (sample_submit['day'] <= end_date)]

# # 进一步筛选出时间为10:00到15:00的数据
# filtered_df = filtered_df[filtered_df['time'].between('10:15', '15:00')]

# filtered_df['clearing price (CNY/MWh)'] = pd.to_numeric(filtered_df['clearing price (CNY/MWh)'], errors='coerce')
# # 对于每一天，找到最大值并进行调整
# for day in filtered_df['day'].unique():
#     day_data = filtered_df[filtered_df['day'] == day]
#     if not day_data.empty:
#         max_idx = day_data['clearing price (CNY/MWh)'].idxmax()
#         max_value = day_data.loc[max_idx, 'clearing price (CNY/MWh)'] 
#         # 将其他值设为-80
#         sample_submit.loc[day_data.index, 'clearing price (CNY/MWh)'] = -80
#         # 保留最大值
#         for i in range(0,4):
#             sample_submit.loc[max_idx+i, 'clearing price (CNY/MWh)'] = max_value*0.5           

# # 筛选出2023年4月1日之后的两天的数据
# start_date = pd.to_datetime('2024-4-1')
# end_date = start_date + timedelta(days=3)
# filtered_df = sample_submit[(sample_submit['day'] > start_date) & (sample_submit['day'] <= end_date)]

# # 进一步筛选出时间为10:00到15:00的数据
# filtered_df = filtered_df[filtered_df['time'].between('10:15', '15:00')]

# filtered_df['clearing price (CNY/MWh)'] = pd.to_numeric(filtered_df['clearing price (CNY/MWh)'], errors='coerce')
# # 对于每一天，找到最大值并进行调整
# for day in filtered_df['day'].unique():
#     day_data = filtered_df[filtered_df['day'] == day]
#     if not day_data.empty:
#         max_idx = day_data['clearing price (CNY/MWh)'].idxmax()
#         max_value = day_data.loc[max_idx, 'clearing price (CNY/MWh)'] 
#         # 将其他值设为-80
#         sample_submit.loc[day_data.index, 'clearing price (CNY/MWh)'] = -80
#         # 保留最大值
#         for i in range(0,4):
#             sample_submit.loc[max_idx+i, 'clearing price (CNY/MWh)'] = max_value
            
# # 筛选出2023年9月28日之后的两天的数据 端午节
# start_date = pd.to_datetime('2023-9-28')
# end_date = start_date + timedelta(days=2)
# filtered_df = sample_submit[(sample_submit['day'] > start_date) & (sample_submit['day'] <= end_date)]

# # 进一步筛选出时间为10:00到15:00的数据
# filtered_df = filtered_df[filtered_df['time'].between('10:15', '15:00')]

# filtered_df['clearing price (CNY/MWh)'] = pd.to_numeric(filtered_df['clearing price (CNY/MWh)'], errors='coerce')
# # 对于每一天，找到最大值并进行调整
# for day in filtered_df['day'].unique():
#     day_data = filtered_df[filtered_df['day'] == day]
#     if not day_data.empty:
#         max_idx = day_data['clearing price (CNY/MWh)'].idxmax()
#         max_value = day_data.loc[max_idx, 'clearing price (CNY/MWh)'] 
#         # 将其他值设为-80
#         sample_submit.loc[day_data.index, 'clearing price (CNY/MWh)'] = -80
#         # 保留最大值
#         for i in range(0,4):
#             sample_submit.loc[max_idx+i, 'clearing price (CNY/MWh)'] = -80


## 保存结果为submit.csv

In [30]:
          
sample_submit.to_csv("submit.csv", index=False, encoding='utf-8')