In [1]:
import pandas as pd
import numpy as np
from math import pi
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import TimeSeriesSplit

import lightgbm as lgb
from lightgbm import LGBMRegressor
import xgboost as xgb
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import optuna

from sklearn.model_selection import KFold, train_test_split,RepeatedKFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

import warnings
warnings.simplefilter("ignore")

# 数据导入

In [2]:
all_df = pd.read_csv('../data/electricity price.csv')
unit = pd.read_csv('../data/unit.csv')

In [3]:
submit = all_df[all_df['clearing price (CNY/MWh)'].isnull()]
test_result = submit.copy()
ABM = submit.copy()

# 异常值处理

In [4]:
sigma = (
    all_df["clearing price (CNY/MWh)"].mean() + 3 * all_df["clearing price (CNY/MWh)"].std() 
)
high_abnormal_mask = all_df[all_df["clearing price (CNY/MWh)"] > (sigma-150)]

In [5]:
nsmallest_idx = high_abnormal_mask.index
nsmallest_idx

Int64Index([  356,   357,   358,   359,   644,   645,   646,   647,   736,
              737,
            ...
            50102, 50103, 50104, 50105, 50106, 50107, 50108, 50109, 50110,
            50111],
           dtype='int64', length=780)

In [6]:
for n in list(nsmallest_idx):
    all_df['clearing price (CNY/MWh)'].iloc[n] = all_df["clearing price (CNY/MWh)"].mean()

In [7]:
all_df["day"] = pd.to_datetime(
    all_df["day"] + " " + all_df["time"].str.replace("24:00:00", "00:00"))
mask = all_df['day'].dt.time == pd.Timestamp('00:00:00').time()
# 需要将这些行的日期部分加一天
all_df.loc[mask, 'day'] += pd.Timedelta(days=1)

# 时间特征

In [8]:
date_col = ['day']
for _col in date_col:
    date_col = pd.to_datetime(all_df[_col], errors='coerce')
    all_df["年"] = date_col.dt.year.fillna(-1)
    all_df["月"] = date_col.dt.month.fillna(-1)
    all_df["星期"] = date_col.dt.weekday.fillna(-1)
    all_df["日"] = date_col.dt.day.fillna(-1)
    all_df["周/年"] = date_col.dt.isocalendar().week.fillna(-1).astype(int)
    all_df['周末'] = all_df['星期'].isin([5, 6]).astype(int)
    all_df['风季'] = all_df['月'].isin([1, 2, 3, 4, 5, 9, 10, 11, 12]).astype(int)
    #all_df['负电价'] = all_df['月'].isin([1, 2, 3, 4, 5,]).astype(int)
    # all_df['高电价'] = all_df['月'].isin([1, 4, 8, 12]).astype(int)
    
    all_df["季度"] = date_col.dt.quarter.fillna(-1)

#     all_df["月初"] = date_col.dt.is_month_start.astype(int).fillna(-1)
#     all_df["月末"] = date_col.dt.is_month_end.astype(int).fillna(-1)
#     all_df["季度初"] = date_col.dt.is_quarter_start.astype(int).fillna(-1)
#     all_df["季度末"] = date_col.dt.is_quarter_end.astype(int).fillna(-1)
       
    all_df[_col + '_month_sin'] = all_df["月"] * np.sin(2 * np.pi * all_df["月"]/12)
    all_df[_col + '_month_cos'] = all_df["月"] * np.cos(2 * np.pi * all_df["月"]/12)
    all_df[_col + '_day_sin'] = all_df["日"] * np.sin(2 * np.pi * all_df["日"]/30)
    all_df[_col + '_day_cos'] = all_df["日"] * np.cos(2 * np.pi * all_df["日"]/30)

In [9]:
date_col = ['time']
for _col in date_col:
    date_col = pd.to_datetime(all_df[_col], errors='coerce')
    all_df["小时"] = date_col.dt.hour.fillna(-1)
    all_df["分钟"] = date_col.dt.minute.fillna(-1)
#     all_df[_col + '_hour_sin'] = all_df["小时"] * np.sin(2 * np.pi * all_df["小时"]/6)
#     all_df[_col + '_hour_cos'] = all_df["小时"] * np.cos(2 * np.pi * all_df["小时"]/6)
#     all_df[_col + '_day_sin'] = all_df["分钟"] * np.sin(2 * np.pi * all_df["分钟"]/15)
#     all_df[_col + '_day_cos'] = all_df["分钟"] * np.cos(2 * np.pi * all_df["分钟"]/15)

    all_df['低谷期'] = all_df['小时'].isin([10, 11, 12, 13, 14, 15]).astype(int)
    all_df['高谷期'] = all_df['小时'].isin([17, 18, 19, 20]).astype(int)


In [10]:
all_df = pd.get_dummies(
    data=all_df,       
    columns=["小时", "日", "月", "年", "星期"],  
    drop_first=True         # 删除第一列以避免多重共线性
)

# 节假日

In [11]:
def generate_holiday_dates(start_dates, duration):
    holidays = []  
    for start_date in start_dates:  
        holidays.extend(pd.date_range(start=start_date, periods=duration).tolist())
    return holidays  

spring_festival_start_dates = ["2022-01-30", "2023-01-20", "2024-02-9"]
labor_start_dates = ["2022-04-30", "2023-04-29"]
yuandan = ["2022-1-1", "2023-1-1", "2024-1-1"]
Tomb_sweeping_Day = ["2022-4-5", "2023-4-5", "2024-4-4"]
spring_festivals = generate_holiday_dates(spring_festival_start_dates, 7)
labor = generate_holiday_dates(labor_start_dates, 5)

all_df['day'] =pd.to_datetime(all_df['day'])
spring_festivals =pd.to_datetime(spring_festivals)
labor =pd.to_datetime(labor)

print(spring_festivals)
print(labor)

DatetimeIndex(['2022-01-30', '2022-01-31', '2022-02-01', '2022-02-02',
               '2022-02-03', '2022-02-04', '2022-02-05', '2023-01-20',
               '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
               '2023-01-25', '2023-01-26', '2024-02-09', '2024-02-10',
               '2024-02-11', '2024-02-12', '2024-02-13', '2024-02-14',
               '2024-02-15'],
              dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03',
               '2022-05-04', '2023-04-29', '2023-04-30', '2023-05-01',
               '2023-05-02', '2023-05-03'],
              dtype='datetime64[ns]', freq=None)


In [12]:
all_df["春节"] = all_df['day'].isin(['2022-01-30', '2022-01-31', '2022-02-01', '2022-02-02',
               '2022-02-03', '2022-02-04', '2022-02-05', '2023-01-20',
               '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
               '2023-01-25', '2023-01-26', '2024-02-09', '2024-02-10',
               '2024-02-11', '2024-02-12', '2024-02-13', '2024-02-14',
               '2024-02-15'])
all_df["劳动节"] = all_df['day'].isin(['2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03',
               '2022-05-04', '2023-04-29', '2023-04-30', '2023-05-01',
               '2023-05-02', '2023-05-03'])
all_df["元旦"] = all_df['day'].isin(["2022-1-1", "2023-1-1", "2024-1-1"])
all_df["清明"] = all_df['day'].isin(["2022-4-3","2022-4-4","2022-4-5","2023-4-5","2024-4-4","2024-4-5","2024-4-6"])

# 滑窗统计

In [13]:
def cal_range(x):

    return x.max() - x.min()

def increase_num(x):

    return (x.diff() > 0).sum()

def decrease_num(x):

    return (x.diff() < 0).sum()

def increase_mean(x):

    diff = x.diff()
    return diff[diff > 0].mean()

def decrease_mean(x):

    diff = x.diff()
    return diff[diff < 0].abs().mean()

def increase_std(x):

    diff = x.diff()
    return diff[diff > 0].std()

def decrease_std(x):

    diff = x.diff()
    return diff[diff < 0].std()



In [14]:
from tqdm import tqdm  

window_sizes = [4,6,12,24]#16?

with tqdm(window_sizes) as pbar:
    for window_size in pbar:

        functions = ["mean", "std", "min", "max", cal_range, increase_num,
                     decrease_num, increase_mean, decrease_mean, increase_std, decrease_std,
                     #increase_nunique,decrease_nunique,increase_jfg,decrease_jfg
                    ]

        for func in functions:
            func_name = func if type(func) == str else func.__name__

            column_name = f"demand_rolling_{window_size}_{func_name}"

            all_df[column_name] = all_df["demand"].rolling(
                window=window_size,        
                min_periods=window_size//2,  
                closed="left"         
            ).agg(func)             

            pbar.set_postfix({"window_size": window_size, "func": func_name})


100%|██████████| 4/4 [05:16<00:00, 79.22s/it, window_size=24, func=decrease_std] 


# 历史值特征和滞后特征

In [15]:
all_df["demand_diff_1"] = all_df["demand"].diff(1)
all_df["demand_pct_1"] = all_df["demand"].pct_change(1)

In [16]:
all_df["demand-1"] = all_df["demand"].shift(1) - all_df["demand"]
all_df["demand-8"] = all_df["demand"].shift(8) - all_df["demand"]

# unit特征构建-ABM

In [17]:
sorted_unit = unit.sort_values("coal consumption (g coal/KWh)")  
sorted_unit.head()

Unnamed: 0,unit ID,Capacity（MW）,utilization hour (h),coal consumption (g coal/KWh),power consumption rate (%)
114,115,60.0,3318.0,63.0,0.0
238,239,25.0,6486.0,73.0,5.33
148,149,30.0,3531.0,74.0,3.61
149,150,30.0,3531.0,74.0,8.72
150,151,30.0,3531.0,74.0,4.07


In [18]:
# 预先计算 sorted_unit 的累积和
sorted_unit['cumulative_capacity'] = sorted_unit['Capacity（MW）'].cumsum()

prices = []
holding_capacities = []

# 找到最后一个满足总需求的机组报价
for demand in all_df["demand"]:
    price = sorted_unit[sorted_unit['cumulative_capacity'] >= demand]["coal consumption (g coal/KWh)"].iloc[0]
    holding_capacity = sorted_unit[sorted_unit['cumulative_capacity'] >= demand]["cumulative_capacity"].iloc[0] - demand
    prices.append(price)
    holding_capacities.append(holding_capacity)

print(len(prices))
prices[:5]

83520


[279.0, 279.0, 279.0, 279.0, 279.0]

In [19]:
price = pd.DataFrame(prices,columns=['price'])
price

Unnamed: 0,price
0,279.00
1,279.00
2,279.00
3,279.00
4,279.00
...,...
83515,277.32
83516,277.32
83517,277.00
83518,275.45


In [20]:
electricity_price = pd.read_csv("../data/electricity price.csv")
feats_electricity = [f for f in electricity_price.columns if f not in ['day', 'time','clearing price (CNY/MWh)']]
electricity = electricity_price[feats_electricity]

electricity

Unnamed: 0,demand
0,40334.18
1,40523.15
2,40374.74
3,40111.55
4,40067.50
...,...
83515,35861.48
83516,35862.24
83517,35005.55
83518,34174.05


In [21]:
model = XGBRegressor(booster="gblinear",learning_rate=0.1,n_estimators=500,random_state=42,feature_selector='shuffle')
# 55392为训练集的长度
train_length = 55392

feature = pd.concat([price[:train_length],electricity[:train_length]],axis=1)
X_electricity = feature
y_electricity = electricity_price["clearing price (CNY/MWh)"].iloc[:train_length].values.reshape(-1, 1)
model.fit(X_electricity, y_electricity)
model.coef_, model.intercept_

(array([0.219885 , 0.0071276]), array([44.6626]))

In [22]:
test_electricity = pd.concat([price[train_length:],electricity[train_length:]],axis=1,)

In [23]:
y_pred = model.predict(test_electricity)
y_pred = y_pred.flatten() 

In [24]:
ABM["clearing price (CNY/MWh)"] = y_pred * 0.95
ABM.to_csv("../data/ABM.csv", index=False)
ABM

Unnamed: 0,day,time,demand,clearing price (CNY/MWh)
55392,2023/7/1,0:15,41975.36,385.923218
55393,2023/7/1,0:30,41548.30,382.922882
55394,2023/7/1,0:45,41567.46,383.052612
55395,2023/7/1,1:00,41184.25,380.261475
55396,2023/7/1,1:15,41373.79,381.741241
...,...,...,...,...
83515,2024/4/18,23:00,35861.48,343.660004
83516,2024/4/18,23:15,35862.24,343.665131
83517,2024/4/18,23:30,35005.55,337.797485
83518,2024/4/18,23:45,34174.05,331.843414


# Stacking-LGB+XGB

In [25]:
all_df = pd.concat([all_df,price],axis=1)

In [26]:
feature_select = all_df.drop(columns=['day','time']).columns

In [27]:
train = all_df[~all_df['clearing price (CNY/MWh)'].isnull()][feature_select]
test = all_df[all_df['clearing price (CNY/MWh)'].isnull()][feature_select]
train

Unnamed: 0,demand,clearing price (CNY/MWh),周/年,周末,风季,季度,day_month_sin,day_month_cos,day_day_sin,day_day_cos,...,demand_rolling_24_decrease_num,demand_rolling_24_increase_mean,demand_rolling_24_decrease_mean,demand_rolling_24_increase_std,demand_rolling_24_decrease_std,demand_diff_1,demand_pct_1,demand-1,demand-8,price
0,40334.18,350.80,48,0,1,4,-2.939152e-15,12.000000,2.079117e-01,0.978148,...,,,,,,,,,,279.00
1,40523.15,350.80,48,0,1,4,-2.939152e-15,12.000000,2.079117e-01,0.978148,...,,,,,,188.97,0.004685,-188.97,,279.00
2,40374.74,350.80,48,0,1,4,-2.939152e-15,12.000000,2.079117e-01,0.978148,...,,,,,,-148.41,-0.003662,148.41,,279.00
3,40111.55,350.80,48,0,1,4,-2.939152e-15,12.000000,2.079117e-01,0.978148,...,,,,,,-263.19,-0.006519,263.19,,279.00
4,40067.50,348.93,48,0,1,4,-2.939152e-15,12.000000,2.079117e-01,0.978148,...,,,,,,-44.05,-0.001098,44.05,,279.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55387,48680.25,518.65,26,0,0,2,7.347881e-16,-6.000000,-3.399323e-14,30.000000,...,11.0,534.231667,393.806364,314.433305,250.903857,-1060.29,-0.021316,1060.29,712.05,288.26
55388,47981.79,493.87,26,0,0,2,7.347881e-16,-6.000000,-3.399323e-14,30.000000,...,11.0,534.231667,469.832727,314.433305,313.259254,-698.46,-0.014348,698.46,681.87,288.26
55389,46587.73,493.87,26,0,0,2,7.347881e-16,-6.000000,-3.399323e-14,30.000000,...,12.0,576.959091,488.885000,290.958384,305.885921,-1394.06,-0.029054,1394.06,1952.20,286.01
55390,45266.09,493.87,26,0,0,2,7.347881e-16,-6.000000,-3.399323e-14,30.000000,...,13.0,606.146000,558.513846,289.225304,385.739944,-1321.64,-0.028369,1321.64,3355.02,285.63


In [28]:
X = train.drop(columns=['clearing price (CNY/MWh)'])
y = train['clearing price (CNY/MWh)']

In [29]:
params_lgb = {
    'num_iterations': 2000,
    'loss_function': 'mse',
    'verbose': -1,
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'lambda_l1': 0.11204432911231003, 
    'lambda_l2': 0.002047008056201105, 
    'num_leaves': 134,
    'learning_rate': 0.05,
    'min_data_in_leaf': 20,
    'one_hot_max_size': 2,
    'max_depth': 8,
    "max_bin":48,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.9,
    'n_estimators':800,
    #'random_state':777
}
model_lgb = []

In [30]:
params_xgb = {
    "num_boost_round": 500,
    "learning_rate": 0.05,
    "booster": "gblinear",
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "max_leaves": 255,
    "verbosity": 1,
    "nthread": -1,
    'subsample': 0.6341353519865157, 
    'colsample_bytree': 0.9999337605579326, 
    'min_child_weight': 0.7631962938899703,
    "early_stopping_rounds": 200,
    'max_depth':8,
    'random_state':42
}
model_xgb = []

In [31]:
# from tscv import GapWalkForward,GapKFold
# cv = GapKFold(n_splits=5, gap_before=5, gap_after=5)

tscv = TimeSeriesSplit(n_splits=5)
x = X
y = y
mse = 0
rmse = 0
for fold, (train_index, val_index) in enumerate(tscv.split(x, y)):
    logging.info(f'############ fold: {fold} ###########')
    x_train, x_val, y_train, y_val = x.iloc[train_index], x.iloc[val_index], y.iloc[train_index], y.iloc[val_index]
    
    trainset = lgb.Dataset(x_train, y_train)
    valset = lgb.Dataset(x_val, y_val)
    model = lgb.train(params_lgb, 
                      trainset, 
                      valid_sets=[trainset, valset], 
                      callbacks=[lgb.log_evaluation(1000)]
                      )
    model_lgb.append(model)
    lgb_pred = pd.Series(model.predict(x_val, num_iteration=model.best_iteration), index=y_val.index).fillna(0)
    
    trainset = xgb.DMatrix(x_train, y_train, enable_categorical=True, nthread=-1)
    valset = xgb.DMatrix(x_val, y_val, enable_categorical=True, nthread=-1)
    model = xgb.train(params_xgb, 
                      trainset, 
                      evals=[(trainset, 'train'),(valset, 'eval')], 
                      num_boost_round=params_xgb["num_boost_round"], 
                      early_stopping_rounds=params_xgb["early_stopping_rounds"], 
                      verbose_eval=1000
                      )
    model_xgb.append(model)
    xgb_pred = pd.Series(model.predict(valset, iteration_range=(0, model.best_iteration)), index=y_val.index).fillna(0)
    
#     model = RandomForestRegressor(**params_rf).fit(x_train, y_train)
#     model_rf.append(model)
#     rf_pred = pd.Series(model.predict(x_val), index=y_val.index).fillna(0)
    
    val_pred = (lgb_pred + xgb_pred) / 2
    mse += mean_squared_error(y_val.fillna(0), val_pred,squared=True)
    rmse += mean_squared_error(y_val.fillna(0), val_pred,squared=False)

mse = mse / tscv.n_splits
rmse = rmse / tscv.n_splits
score = (mse + rmse) / 2
logging.info(f"--------------score {score}--------------")

2024-08-22 10:20:35,886 : INFO : ############ fold: 0 ###########


Parameters: { "colsample_bytree", "early_stopping_rounds", "max_depth", "max_leaves", "min_child_weight", "num_boost_round", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:187.78778	eval-rmse:202.16678
[219]	train-rmse:135.65467	eval-rmse:175.77727


2024-08-22 10:20:39,864 : INFO : ############ fold: 1 ###########


Parameters: { "colsample_bytree", "early_stopping_rounds", "max_depth", "max_leaves", "min_child_weight", "num_boost_round", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:193.41392	eval-rmse:183.23502
[207]	train-rmse:141.32298	eval-rmse:180.86510


2024-08-22 10:20:47,046 : INFO : ############ fold: 2 ###########


Parameters: { "colsample_bytree", "early_stopping_rounds", "max_depth", "max_leaves", "min_child_weight", "num_boost_round", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:180.96118	eval-rmse:189.34934
[499]	train-rmse:140.87129	eval-rmse:157.57601


2024-08-22 10:20:58,252 : INFO : ############ fold: 3 ###########


Parameters: { "colsample_bytree", "early_stopping_rounds", "max_depth", "max_leaves", "min_child_weight", "num_boost_round", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:184.78346	eval-rmse:197.93719
[205]	train-rmse:143.41374	eval-rmse:208.03477


2024-08-22 10:21:08,566 : INFO : ############ fold: 4 ###########


Parameters: { "colsample_bytree", "early_stopping_rounds", "max_depth", "max_leaves", "min_child_weight", "num_boost_round", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	train-rmse:187.59052	eval-rmse:196.33594
[405]	train-rmse:152.81858	eval-rmse:164.52102


2024-08-22 10:21:22,336 : INFO : --------------score 15173.466110686979--------------


In [32]:
test_df_le = test.drop(columns=['clearing price (CNY/MWh)'])
x_test_b = test_df_le
y_pred = np.zeros((test.shape[0], ))
for i in range(0, tscv.n_splits):
    y_pred += model_lgb[i].predict(x_test_b, num_iteration=model_lgb[i].best_iteration)
    y_pred += model_xgb[i].predict(xgb.DMatrix(x_test_b, enable_categorical=True, nthread=-1), iteration_range=(0, model_xgb[i].best_iteration))
submit_pred1 = y_pred / 2 / tscv.n_splits
submit_pred1


array([362.22285418, 347.96915581, 345.57543765, ..., 416.56619128,
       413.84021398, 370.55562595])

In [33]:
ABM = pd.read_csv('../data/ABM.csv')
ABM = ABM['clearing price (CNY/MWh)']
submit_pred1 = np.array((submit_pred1*1.04 + ABM*0.9) / 2)
submit_pred1 = np.around(submit_pred1, decimals=4)
submit_pred1

array([362.0213, 353.2593, 352.0729, ..., 368.6233, 364.5264, 340.1743])

In [34]:
test_result['clearing price (CNY/MWh)'] = submit_pred1

In [35]:
test_result = test_result.drop([f for f in test_result.columns if f not in ['day', 'time', 'clearing price (CNY/MWh)']],axis=1)
test_result.head()

Unnamed: 0,day,time,clearing price (CNY/MWh)
55392,2023/7/1,0:15,362.0213
55393,2023/7/1,0:30,353.2593
55394,2023/7/1,0:45,352.0729
55395,2023/7/1,1:00,346.0388
55396,2023/7/1,1:15,339.3115


In [36]:
cp = pd.DataFrame(test_result['clearing price (CNY/MWh)'])


for i in tqdm(range(0, len(cp), 4), desc="Processing"):
    # 选取当前四行
    subset = cp.iloc[i:i+4]
    # 找到最小值
    min_value = subset.mean()
    # 将这四行替换为最小值
    cp.iloc[i:i+4] = min_value

test_result['clearing price (CNY/MWh)'] = cp
test_result

Processing: 100%|██████████| 7032/7032 [00:01<00:00, 3765.40it/s]


Unnamed: 0,day,time,clearing price (CNY/MWh)
55392,2023/7/1,0:15,353.348075
55393,2023/7/1,0:30,353.348075
55394,2023/7/1,0:45,353.348075
55395,2023/7/1,1:00,353.348075
55396,2023/7/1,1:15,333.038625
...,...,...,...
83515,2024/4/18,23:00,364.510500
83516,2024/4/18,23:15,361.451775
83517,2024/4/18,23:30,361.451775
83518,2024/4/18,23:45,361.451775


# 输出

In [37]:
import datetime
test_result.to_csv(("../submit/submit_"+datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"),index=False)