# This notebook trains a TRANSFORMER model on one building only. dataset1 contains the data of the third building (as a random exemple) of the original dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from memory_profiler import memory_usage
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from torchmetrics import MeanSquaredError
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
%load_ext memory_profiler

def sMAPE(outputs, targets):
    """
    Symmetric Mean Absolute Percentage Error (sMAPE) for evaluating the model.
    It is the sum of the absolute difference between the predicted and actual values divided by the average of
    the predicted and actual value, therefore giving a percentage measuring the amount of error :
    100/n * sum(|F_t - A_t| / ((|F_t| + |A_t|) / 2)) with t = 1 to n

    :param outputs: predicted values
    :param targets: real values
    :return: sMAPE
    """
    return 100 / len(targets) * torch.sum(
        2 * torch.abs(outputs - targets) / (torch.abs(outputs) + torch.abs(targets))
    )

def create_sequences(data, seq_length):
    """
    Function to preprocess sequential data to make it usable for training neural networks.
    It transforms raw data into input-target pairs

    :param data: the dataframe containing the data or the numpy array containing the data
    :param seq_length: The length of the input sequences. It is the number of consecutive data points used as input to predict the next data point.
    :return: the numpy arrays of the inputs and the targets,
    where the inputs are sequences of consecutive data points and the targets are the immediate next data points.
    """
    if len(data) < seq_length:
        raise ValueError(
            "The length of the data is less than the sequence length")

    xs, ys = [], []
    # Iterate over data indices
    for i in range(len(data) - seq_length):
        if type(data) is pd.DataFrame:
            # Define inputs
            x = data.iloc[i:i + seq_length]

            # Define target
            y = data.iloc[i + seq_length]

        else:
            # Define inputs
            x = data[i:i + seq_length]

            # Define target
            y = data[i + seq_length]

        xs.append(x)
        ys.append(y)

    # Convert lists to numpy arrays
    xs = np.array(xs)
    ys = np.array(ys)

    # Shuffle the sequences
    indices = np.arange(xs.shape[0])
    np.random.shuffle(indices)
    xs = xs[indices]
    ys = ys[indices]

    return xs, ys

class Data(Dataset):
    """
    Pytorch Dataset class for the data
    """
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return len(self.x_data)

file_path='/kaggle/input/dataset1/residential_1004.pkl'
model_choice="GRU"
df = pd.read_pickle(file_path)
df["ID"] = df["ID"].astype("category")
df["time_code"] = df["time_code"].astype("uint16")
df = df.set_index("date_time")

# Electricity consumption per hour (date with hour in the index)
df = df["consumption"].resample("h", label='right', closed='right').sum().to_frame()
df.head(5)

# Define the device
device = "cuda" if torch.cuda.is_available() else "cpu"
lr = 0.001
n_epochs = 50

# Scaling the input data
sc = None  # MinMaxScaler()
label_sc = None  # MinMaxScaler()
window_size = 10  # number of data points used as input to predict the next data point
data = create_sequences(df["2009-07-14":"2010-12-15"].values, window_size)
data_test = create_sequences(df["2010-12-15":"2011-01-01"].values, window_size)

# Use create_sequences to create inputs and targets
train_x, train_y = create_sequences(data if sc else df["2009-07-14":"2010-12-15"], window_size)
print(train_x.shape, train_y.shape)

test_x, test_y = create_sequences(data_test if label_sc else df["2010-12-15":"2011-01-01"], window_size)

# Pytorch data loaders/generators
batch_size = 2048

# Create TensorDataset
train_data = Data(torch.FloatTensor(train_x), torch.FloatTensor(train_y))

# Drop the last incomplete batch
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)
del train_x, train_y

# Test data
test_data = Data(torch.FloatTensor(test_x), torch.FloatTensor(test_y))
test_loader = DataLoader(test_data, shuffle=False, batch_size=1)
del test_x, test_y

# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# Model definition using Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=8, num_layers=4, dropout=0.2):
        super(TransformerModel, self).__init__()
        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.decoder(x[:, -1, :])
        return x

model = TransformerModel().to(device)
#model_mem_usage = memory_usage((TransformerModel,), interval=0.1)
#print(f"Memory usage for creating Transformer model: {max(model_mem_usage)} MiB")

def train_function(net, criterion, optimizer, train_loader, n_epochs=5, device=torch.device("cuda")):
    for epoch in range(n_epochs):
        for seqs, labels in train_loader:
            # Move data to device
            seqs, labels = seqs.float().to(device), labels.float().to(device)

            # Get model outputs
            outputs = net(seqs)

            # Compute loss
            loss = criterion(outputs, labels)
            optimizer.zero_grad()  # Reset the graph of gradients
            loss.backward()  # Calculate the graph of gradients
            optimizer.step()  # Update each gradient
        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
    return net

def test_function(net, dataloader_test, scaler, label_scaler, device=torch.device("cuda")):
    """
    Model evaluation on test data
    :param net:
    :param dataloader_test:
    :return:
    """
    # Define MSE metric
    mse = MeanSquaredError().to(device=device)

    net.eval()  # Tell the net that we are running a validation set (no dropout for example)
    list_outputs = []
    list_targets = []
    with torch.no_grad():  # Do not reserve memory space for gradients
        for seqs, labels in dataloader_test:
            # Move data to device
            seqs, labels = seqs.float().to(device), labels.float().to(device)
            # Pass seqs to net and squeeze the result
            outputs = net(seqs)

            if label_scaler:
                outputs = torch.tensor(scaler.inverse_transform(outputs), device=device)
                labels = torch.tensor(label_scaler.inverse_transform(labels), device=device)

            outputs = outputs.squeeze()
            labels = labels.squeeze()

            # Compute loss
            mse(outputs, labels)
            list_targets.append(labels.detach())  # Detach to remove PyTorch constraints on the values
            list_outputs.append(outputs.detach())
    # Compute final metric value
    test_mse = mse.compute()
    print(f"Test MSE: {test_mse}")

    return torch.tensor(list_outputs, device=device), torch.tensor(list_targets, device=device), test_mse

# Defining loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
%reload_ext memory_profiler

# Train the model with Transformer
train_mem_usage = memory_usage((train_function, (model, criterion, optimizer, train_loader, n_epochs, device)), interval=0.1)
print(f"Memory usage for training function: {max(train_mem_usage)} MiB")

net = train_function(model, criterion, optimizer, train_loader, n_epochs, device)

# Evaluate the Transformer model
net.to(device)
list_outputs, list_targets, test_mse = test_function(net, test_loader, sc, label_sc, device)

s_mape = round(sMAPE(list_outputs, list_targets).cpu().item(), 3)
print(f"sMAPE: {s_mape}%")

# Visualizations
plt.plot(list_outputs.to("cpu"), "-o", color="blue", label="TRANSFORMER Predictions", markersize=3)
plt.plot(list_targets.to("cpu"), color="red", label="Actual")
plt.ylabel("Energy Consumption (MW)")
plt.title(f"Energy Consumption for Electricity state")
plt.legend()
plt.show()


(12469, 10, 1) (12469, 1)




Epoch 1, Loss: 2.717456817626953
Epoch 2, Loss: 3.0488369464874268
Epoch 3, Loss: 2.8257153034210205
Epoch 4, Loss: 2.8749947547912598
Epoch 5, Loss: 2.5123119354248047
Epoch 6, Loss: 2.2322583198547363
Epoch 7, Loss: 2.273366928100586
Epoch 8, Loss: 2.529101848602295
Epoch 9, Loss: 1.965575098991394
Epoch 10, Loss: 2.094241142272949
Epoch 11, Loss: 2.1264524459838867
Epoch 12, Loss: 2.0884294509887695
Epoch 13, Loss: 1.8732662200927734
Epoch 14, Loss: 2.072746753692627
Epoch 15, Loss: 2.226358652114868
Epoch 16, Loss: 2.085824966430664
Epoch 17, Loss: 2.1527485847473145
Epoch 18, Loss: 1.9396088123321533
Epoch 19, Loss: 1.9086382389068604
Epoch 20, Loss: 1.8857200145721436
Epoch 21, Loss: 2.048654079437256
Epoch 22, Loss: 1.877053141593933
Epoch 23, Loss: 2.001169204711914
Epoch 24, Loss: 1.9983865022659302
Epoch 25, Loss: 1.834184169769287
Epoch 26, Loss: 2.030308485031128
Epoch 27, Loss: 2.300358772277832
Epoch 28, Loss: 2.070951461791992
Epoch 29, Loss: 2.1619222164154053
Epoch 30,

KeyboardInterrupt: 