In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction


In [None]:
# Load data
data = pd.read_csv("/workspace/COMP3610-Renewable-Energy-Prediction/data/processed/feature_engineered_data.csv")

In [None]:
# Prepare data
target = 'Solar'  # or 'Wind Onshore'
numeric_cols = data.select_dtypes(include=[np.number]).columns.drop([target])
X = data[numeric_cols].values
y = data[target].values

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)  # No shuffle for time series


In [None]:
# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Create sequences
def create_sequences(data, targets, seq_length):
    sequences = []
    target_seq = []
    for i in range(len(data)-seq_length):
        sequences.append(data[i:i+seq_length])
        target_seq.append(targets[i+seq_length])
    return np.array(sequences), np.array(target_seq)

In [None]:
seq_length = 24  # 24 hours lookback
X_train_seq, y_train_seq = create_sequences(X_train, y_train, seq_length)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, seq_length)

In [None]:
# Convert to PyTorch tensors
train_data = torch.utils.data.TensorDataset(
    torch.FloatTensor(X_train_seq), 
    torch.FloatTensor(y_train_seq))
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)


In [None]:
# Transformer config
config = TimeSeriesTransformerConfig()
prediction_length=1,
input_size=X_train_seq.shape[2],
decoder_attention_heads=4,
encoder_attention_heads=4,
num_time_features=len(numeric_cols)
model = TimeSeriesTransformerForPrediction(config)

In [None]:
# Training
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()


In [None]:
for epoch in range(10):  # Reduced for demo
    for batch_x, batch_y in train_loader:
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    test_preds = model(torch.FloatTensor(X_test_seq)).numpy()

In [None]:
rmse = np.sqrt(mean_squared_error(y_test_seq, test_preds))
print(f"Transformer RMSE: {rmse:.4f}")