In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.cuda.is_available())

False


In [20]:
df = pd.read_csv('merge_train.csv',parse_dates=['date'])

In [21]:
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekdays'] = df['date'].dt.dayofweek  # 1 for weekend, 0 for weekday
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute
# df['EMA_4h'] = df['Price'].ewm(span=48, adjust=False).mean()


df['SMA_4h'] = df['Price'].shift(1).rolling(window=48).mean()
# df['EMA_1d'] = df['Price'].ewm(span=288, adjust=False).mean()
df['Forecast_difference_0.5 hour future'] = df['Forecast_Roof_0.5 hour future'] - df['Forecast_Demand_0.5 hour future']
df["Forecast_ratio_0.5 hour future"] = df['Forecast_Roof_0.5 hour future'] / df['Forecast_Demand_0.5 hour future']
df.set_index('date', inplace=True)

In [22]:

df = df.drop(columns=['Region', 'Actual_Roof_OPOWER', 'Actual_Roof_LASTCHANGED'])


In [23]:
from datetime import datetime, time

def create_qld_holidays_list():
    holidays = [
        # 2023
        "2023-01-01", "2023-01-02", "2023-01-26", "2023-04-07", "2023-04-08", "2023-04-09", "2023-04-10",
        "2023-04-25", "2023-05-01", "2023-08-16", "2023-10-02", "2023-12-25", "2023-12-26",
        # 2024
        "2024-01-01", "2024-01-26", "2024-03-29", "2024-03-30", "2024-03-31", "2024-04-01",
        "2024-04-25", "2024-05-06", "2024-08-14", "2024-10-07", "2024-12-25", "2024-12-26",
        # 2025
        "2025-01-01", "2025-01-27", "2025-04-18", "2025-04-19", "2025-04-20", "2025-04-21",
        "2025-04-25", "2025-05-05", "2025-08-13", "2025-10-06", "2025-12-25", "2025-12-26"
    ]
    return pd.to_datetime(holidays)

def add_qld_holidays(df):
    holidays = create_qld_holidays_list()
    
   
    df['is_holiday'] = 0
    
    df.loc[df.index.isin(holidays), 'is_holiday'] = 1
    
    
    return df

In [24]:
df = add_qld_holidays(df)

In [25]:
# df["time_idx"] = np.arange(len(df))
X = df.drop('Price', axis=1)

y = df['Price']


train_end_date = pd.to_datetime("2024-04-30 23:59:59")
val_end_date = pd.to_datetime("2024-05-31 23:59:59")
test_start_date = pd.to_datetime("2024-06-01 00:00:00")


X_train = X[X.index <= train_end_date]
X_val = X[(X.index > train_end_date) & (X.index <= val_end_date)]
X_test = X[X.index >= test_start_date]

y_train = y[y.index <= train_end_date]
y_val = y[(y.index > train_end_date) & (y.index <= val_end_date)]
y_test = y[y.index >= test_start_date]

# train_time_idx = torch.tensor(X_train["time_idx"].values, dtype=torch.float32)
# val_time_idx = torch.tensor(X_val["time_idx"].values, dtype=torch.float32)
# test_time_idx = torch.tensor(X_test["time_idx"].values, dtype=torch.float32)



X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_val = torch.tensor(X_val.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)

y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)





In [26]:
df.columns

Index(['Demand', 'Price', 'Forecast_Demand_0.5 hour future',
       'Forecast_Demand_1 hour future', 'Forecast_Demand_1.5 hours future',
       'Forecast_Demand_2 hours future', 'Forecast_Demand_2.5 hours future',
       'Forecast_Demand_3 hours future', 'Forecast_Demand_3.5 hours future',
       'Forecast_Demand_4 hours future', 'Forecast_Demand_4.5 hours future',
       'Forecast_Demand_5 hours future', 'Forecast_Demand_5.5 hours future',
       'Forecast_Demand_6 hours future', 'Forecast_Demand_6.5 hours future',
       'Forecast_Demand_7 hours future', 'Forecast_Demand_7.5 hours future',
       'Forecast_Demand_8 hours future', 'Forecast_Demand_8.5 hours future',
       'Forecast_Demand_9 hours future', 'Forecast_Demand_9.5 hours future',
       'Forecast_Roof_0.5 hour future', 'Forecast_Roof_1 hour future',
       'Forecast_Roof_1.5 hours future', 'Forecast_Roof_2 hours future',
       'Forecast_Roof_2.5 hours future', 'Forecast_Roof_3 hours future',
       'Forecast_Roof_3.5 hour

In [9]:
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerModel

In [10]:
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerModel


config = TimeSeriesTransformerConfig(
    prediction_length=24,  
    context_length=48,  
    num_time_features=X_train.shape[1],  
    num_static_categorical_features=0,  
    num_static_real_features=0, 
    embedding_dimension=16 
)


model = TimeSeriesTransformerModel(config)


In [11]:
past_observed_mask_train = torch.ones_like(X_train, dtype=torch.bool)
past_observed_mask_val = torch.ones_like(X_val, dtype=torch.bool)
past_observed_mask_test = torch.ones_like(X_test, dtype=torch.bool)

past_values = X_train.unsqueeze(0)
past_time_features = train_time_idx.unsqueeze(0).unsqueeze(1).unsqueeze(-1)
past_observed_mask = past_observed_mask_train.unsqueeze(0).unsqueeze(1)

# 模型前向傳播
output = model(
    past_values=past_values,
    past_time_features=past_time_features,
    past_observed_mask=past_observed_mask
)


predicted_values = output.last_hidden_state

print(predicted_values.shape)  


RuntimeError: The size of tensor a (48) must match the size of tensor b (81504) at non-singleton dimension 2

In [62]:
# 查看數據形狀
print(f"past_values shape: {past_values.shape}")
print(f"past_time_features shape: {past_time_features.shape}")
print(f"past_observed_mask shape: {past_observed_mask.shape}")


past_values shape: torch.Size([1, 81504, 51])
past_time_features shape: torch.Size([1, 1, 81504, 1])
past_observed_mask shape: torch.Size([1, 1, 81504, 51])
