In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [27]:
# Load the CSV file
data = pd.read_csv(r'dataset_origin\NL\NL_elia.csv')

data.dtypes

Datetime            object
Resolution code     object
Elia Grid Load     float64
dtype: object

In [28]:
data['Datetime'].isna().sum()

0

In [29]:
data['Datetime'] = pd.to_datetime(data['Datetime'], utc=True).dt.date
data.tail(10)

Unnamed: 0,Datetime,Resolution code,Elia Grid Load
35218,2024-03-31,PT15M,5973.219
35219,2024-03-31,PT15M,6058.357
35220,2024-03-31,PT15M,6214.066
35221,2024-03-31,PT15M,6301.227
35222,2024-03-31,PT15M,6423.219
35223,2024-03-31,PT15M,6608.277
35224,2024-03-30,PT15M,6786.324
35225,2024-03-30,PT15M,6875.07
35226,2024-03-30,PT15M,6998.226
35227,2024-03-30,PT15M,7129.384


In [40]:
data = data[data['Datetime'].astype(str) != '2024-03-30']
data.tail(10)

Unnamed: 0,Datetime,Resolution code,Elia Grid Load
35214,2024-03-31,PT15M,5839.451
35215,2024-03-31,PT15M,5888.431
35216,2024-03-31,PT15M,5802.283
35217,2024-03-31,PT15M,5900.483
35218,2024-03-31,PT15M,5973.219
35219,2024-03-31,PT15M,6058.357
35220,2024-03-31,PT15M,6214.066
35221,2024-03-31,PT15M,6301.227
35222,2024-03-31,PT15M,6423.219
35223,2024-03-31,PT15M,6608.277


In [30]:
import os
from sklearn.preprocessing import MinMaxScaler
import torch

In [31]:
# 设置设备和超参数
device = torch.device('cpu')
learning_rate = 1e-3
output_folder = 'NL'
data_close = data['Elia Grid Load']
reversed_data_close = list(data_close[::-1])

# 定义输出目录
output_dir = f'data_exp/{output_folder}'

# 如果目录不存在，创建它
os.makedirs(output_dir, exist_ok=True)
# 创建时间序列数据（14天窗口）
time_series_list = []
for i in range(0, len(reversed_data_close) - 13):
    time_series_list.append(reversed_data_close[i:i+14])

# 创建 DataFrame，列名为 "before X day" 和 "target"
columns = [f"before {i} day" for i in range(1, 14)][::-1] + ['target']
data_need = pd.DataFrame(time_series_list, columns=columns)

# 定义转换函数（处理带逗号的数字字符串）
def convert_to_float(x):
    try:
        return float(str(x).replace(',', ''))
    except:
        return x

# 转换整个 DataFrame 为浮点数
data_need = data_need.applymap(convert_to_float)

# 归一化
x = data_need.values[:, :-1]  # 特征
y = data_need.values[:, -1]   # 目标
ss_input = MinMaxScaler()
ss_output = MinMaxScaler()
x = ss_input.fit_transform(x)
y = ss_output.fit_transform(y[:, np.newaxis]).reshape(-1, 1)
data_need = pd.concat([pd.DataFrame(x), pd.DataFrame(y)], axis=1)

# 划分数据集
total_samples = len(data_need)
train_size = int(0.8 * total_samples)
test_size = int(0.1 * total_samples)
val_size = total_samples - train_size - test_size

train_set = data_need[:train_size].reset_index(drop=True)
test_set = data_need[train_size:train_size + test_size].reset_index(drop=True)
val_set = data_need[train_size + test_size:].reset_index(drop=True)

# 保存数据集
train_set.to_csv(f'data_exp/{output_folder}/train_set.csv', index=False)
test_set.to_csv(f'data_exp/{output_folder}/test_set.csv', index=False)
val_set.to_csv(f'data_exp/{output_folder}/val_set.csv', index=False)

  data_need = data_need.applymap(convert_to_float)
