In [1]:
import pandas as pd
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from darts import TimeSeries
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim






In [2]:
class PyTorchWindowGenerator():
    def __init__(self, input_width, label_width, shift,
                 train_df, test_df, label_columns=None):
        self.train_data = torch.tensor(train_df.values, dtype=torch.float32)
        self.test_data = torch.tensor(test_df.values, dtype=torch.float32)

        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def split_window(self, features):
        inputs = features[self.input_slice, :]
        labels = features[self.labels_slice, :]
        if self.label_columns is not None:
            labels = torch.stack([labels[:, self.column_indices[name]] for name in self.label_columns], axis=-1)

        return inputs, labels

    def make_dataset(self, data):
        sequence_length = self.total_window_size
        sequences = data.unfold(0, sequence_length, 1).permute(0, 2, 1)
        dataset = [self.split_window(sequence) for sequence in sequences]
        return DataLoader(dataset, shuffle=True)

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

In [3]:
data = pd.read_csv('../train_group2/merged_data_2.csv', skiprows = 0)

#data['Unnamed: 0'] = pd.to_datetime(data['Unnamed: 0'])
#data.set_index(data.columns[0], inplace=True)
#data.index.name = 'Time'

data = data.rename(columns={'Unnamed: 0': 'datetime'})

data['datetime'] = pd.to_datetime(data['datetime'])


# Drop the first column
data = data.drop(data.columns[1], axis=1)


In [4]:
# Extract day of the week (0 = Monday, 6 = Sunday)
data['weekday'] = data['datetime'].dt.weekday
data['month'] = data['datetime'].dt.month
data['time'] = data['datetime'].dt.time

# Encode 'month' and 'weekday' as categorical variables
data['month'] = data['month'].astype('category')
data['weekday'] = data['weekday'].astype('category')

# Perform one-hot encoding on 'month' and 'weekday'
df_encoded = pd.get_dummies(data, columns=['month', 'weekday', "time"], prefix=['month', 'weekday', 'time'])

# remove datetime
df_encoded = df_encoded.drop('datetime', axis=1)

In [5]:
# Extract features and labels
labels = df_encoded['MWh'].values
features = df_encoded.drop(['MWh'], axis=1).values

In [6]:
# Normalize features using Min-Max scaling
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

In [7]:
# Define sequence length (number of time steps for LSTM)
sequence_length = 7*24  # Use the previous 7 days as input, 24 measurements
# Create sequences and labels for training
X, y = [], []
for i in range(len(features) - sequence_length-24):
    X.append(features[i:i+sequence_length])
    #print(features[i:i+sequence_length].shape)
    y.append(labels[i+sequence_length + 13:i+sequence_length+24])
    

X, y = np.array(X), np.array(y)

In [8]:
# Split data into training and testing sets
split_ratio = 0.8  # Adjust as needed
split_index = int(split_ratio * len(X))


X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [9]:
# Convert NumPy arrays to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [10]:
# Create DataLoader for training and testing
batch_size = 32  # Adjust as needed
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [11]:
labels.size

26304

In [12]:
# Split the data
#train = data[data.index.year < 2021]
#test = data[data.index.year >= 2021]

In [13]:
"""
train_width = 168
test_width = 24

window = PyTorchWindowGenerator(input_width=train_width, label_width=test_width, shift=0, 
                                train_df=train, test_df=test, 
                                label_columns=["MWh"])

train_data = window.make_dataset(window.train_data)
test_data = window.make_dataset(window.test_data)
"""


'\ntrain_width = 168\ntest_width = 24\n\nwindow = PyTorchWindowGenerator(input_width=train_width, label_width=test_width, shift=0, \n                                train_df=train, test_df=test, \n                                label_columns=["MWh"])\n\ntrain_data = window.make_dataset(window.train_data)\ntest_data = window.make_dataset(window.test_data)\n'

In [14]:
"""
for inputs, labels in train_data:
    print("Inputs:", inputs)
    print("Labels:", labels)
    break  # Stop after one batch
"""

'\nfor inputs, labels in train_data:\n    print("Inputs:", inputs)\n    print("Labels:", labels)\n    break  # Stop after one batch\n'

In [15]:
"""
for inputs, labels in train_data:
    print("Inputs shape:", inputs.shape)
    print("Labels shape:", labels.shape)
    break  # Stop after one batch
"""

'\nfor inputs, labels in train_data:\n    print("Inputs shape:", inputs.shape)\n    print("Labels shape:", labels.shape)\n    break  # Stop after one batch\n'

In [16]:
import torch.nn as nn

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # Get the output from the last time step
        return out

class LSTMTimeSeriesModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMTimeSeriesModel, self).__init__()
        
        # Hidden dimensions
        self.hidden_dim = hidden_dim

        # Number of hidden layers
        self.num_layers = num_layers

        # LSTM layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)

        # Fully connected layer
        self.linear = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device).requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch 
        #print("x", x)
        #print("h0", h0)
        #print("c0", c0)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        #print("out",out)
        out = self.linear(out[:, -1, :])
        out = out.unsqueeze(-1)  # Add an extra dimension to match [batch_size, sequence_length, features]

        
        return out




In [17]:
# Define your model with appropriate dimensions
input_dim = X_train.shape[2]  # number of features
hidden_dim = 64  # can be adjusted as needed
num_layers = 40  # number of LSTM layers
output_dim = 24  # number of timesteps to predict


input_size = X_train.shape[2]  # Number of features
hidden_size = 132  # Adjust as needed
num_layers = 20  # Adjust as needed
output_size = 1  # Adjust for regression tasks

In [18]:
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Define loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adjust learning rate as needed

scheduler = StepLR(optimizer, step_size=10, gamma=0.9)


# Set device for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Move the model to the GPU
model.to(device)

# Move data to GPU
"""
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)
"""

'\nX_train = X_train.to(device)\ny_train = y_train.to(device)\nX_test = X_test.to(device)\ny_test = y_test.to(device)\n'

In [19]:
#model.to(device)

# Loss and optimizer
criterion = nn.MSELoss()  # We'll use Mean Squared Error (MSE) as the loss function
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training parameters
num_epochs = 50
train_losses = []
model.train()
for epoch in range(num_epochs):
    print("epoch starts:", epoch)
    
    running_loss = 0.0
    for inputs, labels in train_loader:
        # Transfer Data to GPU if available
        #inputs, labels = inputs.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        
        # Reshape outputs to match criterion's expected shape
        outputs = outputs.unsqueeze(1)

        # Compute loss
        loss = criterion(outputs, labels)
        #print("outputs", outputs)
        #print("label", labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_data)
    train_losses.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

# Plot the training loss
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
plt.plot(train_losses, label="Training loss")
plt.xlabel("Epochs")
plt.ylabel("Loss (MSE)")
plt.title("Training Loss over Epochs")
plt.legend()
plt.grid(True)
plt.show()

epoch starts: 0


NotImplementedError: Could not run 'aten::mkldnn_rnn_layer' with arguments from the 'CUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::mkldnn_rnn_layer' is only available for these backends: [CPU, Meta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMeta, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

CPU: registered at aten/src/ATen/RegisterCPU.cpp:31034 [kernel]
Meta: registered at /dev/null:228 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:491 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:280 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:19 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradOther: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradCPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradCUDA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradHIP: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradXLA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradMPS: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradIPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradXPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradHPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradVE: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradLazy: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradMeta: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradMTIA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradPrivateUse1: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradPrivateUse2: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradPrivateUse3: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
AutogradNestedTensor: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:17476 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/generated/TraceType_2.cpp:16726 [kernel]
AutocastCPU: registered at ../aten/src/ATen/autocast_mode.cpp:492 [kernel]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:354 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:815 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1073 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:210 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:152 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:487 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]
