In [None]:
import pandas as pd
from torch.utils.data import DataLoader
import os

In [None]:
batch_size = 16

# Wind Data
https://data.dtu.dk/articles/dataset/Data-Driven_Wind_Power_Forecast_SEST2021_/13286336?file=27527084

In [None]:
df = pd.read_csv(f"../dataset/wind/Persistence_PRE05_H60_mn.csv")
if df.columns[0] == '' or pd.isna(df.columns[0]):
        df = df.drop(df.columns[0], axis=1)
columns = df.columns.tolist()
num_timepoints = len([col for col in columns if col.startswith('Time_')])
result_rows = []
for _, row in df.iterrows():
    for i in range(num_timepoints):
        time_col = f'Time_{i}'
        forecast_col = f'Forecasted_{i}'
        observed_col = f'Observed_{i}'

        if (time_col in row.index and
            forecast_col in row.index and
            observed_col in row.index and
            pd.notna(row[time_col])):

            result_rows.append({
                'timestamp': row[time_col],
                'forecasted': row[forecast_col] if pd.notna(row[forecast_col]) else 0,
                'observed': row[observed_col] if pd.notna(row[observed_col]) else 0
            })

result_df = pd.DataFrame(result_rows)
result_df['time'] = pd.to_datetime(result_df['timestamp'])
result_df = result_df.sort_values('timestamp').reset_index(drop=True)
result_df.set_index('time', inplace=True)
result_df = result_df["2019-10-22":"2019-12-31 00:00:01"]["observed"]
result_df.to_csv(f"../preprocessed/preprocessed/wind_1.csv", index=True, header=True)

In [None]:
df = pd.read_csv(f"../dataset/wind/Persistence_PRE03_H60_m.csv")
if df.columns[0] == '' or pd.isna(df.columns[0]):
        df = df.drop(df.columns[0], axis=1)
columns = df.columns.tolist()
num_timepoints = len([col for col in columns if col.startswith('Time_')])
result_rows = []
for _, row in df.iterrows():
    for i in range(num_timepoints):
        time_col = f'Time_{i}'
        forecast_col = f'Forecasted_{i}'
        observed_col = f'Observed_{i}'

        if (time_col in row.index and
            forecast_col in row.index and
            observed_col in row.index and
            pd.notna(row[time_col])):

            result_rows.append({
                'timestamp': row[time_col],
                'forecasted': row[forecast_col] if pd.notna(row[forecast_col]) else 0,
                'observed': row[observed_col] if pd.notna(row[observed_col]) else 0
            })
result_df = pd.DataFrame(result_rows)
result_df['time'] = pd.to_datetime(result_df['timestamp'])
result_df = result_df.sort_values('timestamp').reset_index(drop=True)
result_df.set_index('time', inplace=True)
result_df = result_df["2019-9-25":"2019-10-8 00:00:01"]["observed"]
result_df = result_df[~result_df.index.duplicated(keep="first")]
result_df.to_csv(f"../preprocessed/preprocessed/wind_2.csv", index=True, header=True)

# PV-Live
https://zenodo.org/records/15013388

In [None]:
dataframes = []
for month in range(1, 13):
    month_str = f"{month:02d}"
    filename = f"../dataset/pv/tng00010_2023-{month_str}.tsv"
    df = pd.read_csv(filename, sep='\t')
    dataframes.append(df)
    combined_df = pd.concat(dataframes, ignore_index=True)
    combined_df.to_csv("../preprocessed/preprocessed/pv.csv", index=False, encoding='utf-8')

# WPuQ
https://zenodo.org/records/5642902

In [None]:
df = pd.read_hdf('../dataset/electric/2018_data_1min.hdf5', key="NO_PV/SFH10/HEATPUMP")
df = df.dropna()
df["time"] = pd.to_datetime(df.index, unit="s")
df.set_index("time", inplace=True)
df = df["2018-05-06":"2018-12-31 00:00:00"]
df.to_csv("../preprocessed/preprocessed/electric.csv", index=True)

# MPVBench
https://github.com/KIT-IAI/MPVBench/tree/main?tab=readme-ov-file

In [None]:
df = pd.read_csv("../dataset/p_watt_15min.csv")
df["time"] = pd.to_datetime(df["time"])
df.set_index("time", inplace=True)
df.corr()

# Special Waveform

In [None]:
from ResampleGAN.utils.WaveformGenerator import create_special_wave_dataset
create_special_wave_dataset("../dataset/special_wave_triangle.csv", length=105120, freq=400, amplitude=1.0, wave_type="triangle")
create_special_wave_dataset("../dataset/special_wave_square.csv", length=105120, freq=400, amplitude=1.0, wave_type="square")
create_special_wave_dataset("../dataset/special_wave_sawtooth.csv", length=105120, freq=400, amplitude=1.0, wave_type="sawtooth")
create_special_wave_dataset("../dataset/special_wave_line.csv", length=105120, freq=400, amplitude=1.0, wave_type="line")
create_special_wave_dataset("../dataset/special_wave_sine.csv", length=105120, freq=400, amplitude=1.0, wave_type="sine")

# Dataset Generator

In [None]:
from ResampleGAN.utils.DatasetGenerator import DatasetGenerator, get_aligned_input_output

In [None]:
df = pd.read_csv("../dataset/special_wave_line.csv")
df["time"] = pd.to_datetime(df["time"])
df.set_index("time", inplace=True)
df = df[["value"]]

df_input, df_output = get_aligned_input_output(df, s_in="15min", s_out="5min")

dataset = DatasetGenerator(
    df_input=df_input,
    df_output=df_output,
    input_length=97,
    output_length=289,
    s_in="15min",
    s_out="5min",
    use_window=True
)

train_dataset, test_dataset, valid_dataset = DatasetGenerator.split_dataset(dataset)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)