# Replication

In [1]:
import sys
sys.path.append("mypath")


import numpy as np
import pandas as pd
import math
import os, datetime
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error
import matplotlib.pyplot as plt
import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import pywt
import tensorflow
from numpy.random import seed
# plt.style.use('seaborn')
plt.style.use('seaborn-v0_8')

from tqdm.auto import tqdm


In [2]:
class Time2Vector(Layer):
    ''' https://arxiv.org/abs/1907.05321'''
    def __init__(self, seq_len, **kwargs):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

    def build(self, input_shape):
        self.weights_linear = self.add_weight(name='weight_linear',
                                              shape=(int(self.seq_len),),
                                              initializer='uniform',
                                              trainable=True)

        self.bias_linear = self.add_weight(name='bias_linear',
                                           shape=(int(self.seq_len),),
                                           initializer='uniform',
                                           trainable=True)

        self.weights_periodic = self.add_weight(name='weight_periodic',
                                                shape=(int(self.seq_len),),
                                                initializer='uniform',
                                                trainable=True)

        self.bias_periodic = self.add_weight(name='bias_periodic',
                                             shape=(int(self.seq_len),),
                                             initializer='uniform',
                                             trainable=True)

    def call(self, x):

        x = tf.math.reduce_mean(x[:, :, :4], axis=-1)
        time_linear = self.weights_linear * x + self.bias_linear
        time_linear = tf.expand_dims(time_linear, axis=-1)

        time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
        time_periodic = tf.expand_dims(time_periodic, axis=-1)
        return tf.concat([time_linear, time_periodic], axis=-1)

    def get_config(self):
        config = super().get_config().copy()
        config.update({'seq_len': self.seq_len})
        return config


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class T2V(nn.Module):
    def __init__(self, seq_len):
        super(T2V, self).__init__()
        self.seq_len = seq_len

        self.weights_linear = nn.Parameter(torch.rand(seq_len))
        self.bias_linear = nn.Parameter(torch.rand(seq_len))
        self.weights_periodic = nn.Parameter(torch.rand(seq_len))
        self.bias_periodic = nn.Parameter(torch.rand(seq_len))

    def forward(self, x):
        x = torch.mean(x[:, :, :4], dim=-1)
        time_linear = self.weights_linear * x + self.bias_linear
        time_linear = time_linear.unsqueeze(-1)

        time_periodic = torch.sin(x * self.weights_periodic + self.bias_periodic)
        time_periodic = time_periodic.unsqueeze(-1)

        return torch.cat([time_linear, time_periodic], dim=-1)

    def get_config(self):
        config = {'seq_len': self.seq_len}
        return config
    

In [4]:
# Function to generate sample data
def generate_sample_data(batch_size, seq_len):
    # Generate random data with shape (batch_size, seq_len, 4)
    data = np.random.rand(batch_size, seq_len, 4).astype(np.float32)
    return data

# Define sequence length and batch size
seq_len = 10
batch_size = 2
    
# Generate sample data
sample_data = generate_sample_data(batch_size, seq_len)


# Create Time2Vector model
time2vector_model = Time2Vector(seq_len)
print(sample_data)
output = time2vector_model(sample_data)
print(output)

import torch

input_tensor = torch.from_numpy(sample_data)

# Instantiate the Time2Vector model
time2vector_model = T2V(seq_len)

# Forward pass
output = time2vector_model(input_tensor)

# Print the output shape
print("Input shape:", input_tensor)
print("Output shape:", output)

[[[0.31479704 0.275026   0.86952376 0.93648964]
  [0.88384736 0.18213204 0.16908891 0.3862969 ]
  [0.13467349 0.7226308  0.468991   0.5462585 ]
  [0.25810644 0.06038688 0.75916886 0.94908124]
  [0.8011144  0.9174088  0.25338295 0.73676723]
  [0.6857363  0.08121272 0.7529102  0.47461447]
  [0.14907882 0.0590271  0.02894877 0.9891651 ]
  [0.74461067 0.40470362 0.2773737  0.22546142]
  [0.41808578 0.35657334 0.16621795 0.05603991]
  [0.2046366  0.19749862 0.538784   0.48676988]]

 [[0.47319606 0.6364359  0.46419024 0.10070832]
  [0.77985954 0.6885937  0.8898728  0.18206063]
  [0.42904365 0.6895054  0.9198345  0.563221  ]
  [0.33910677 0.65058756 0.00705858 0.8889488 ]
  [0.8254846  0.78716743 0.7862219  0.36993417]
  [0.5574389  0.07639623 0.8385944  0.28182852]
  [0.27403897 0.4681438  0.96496814 0.2552667 ]
  [0.30583695 0.46209434 0.08097155 0.88746846]
  [0.22060531 0.33043098 0.86574054 0.935919  ]
  [0.64233595 0.00880633 0.47539952 0.09682868]]]
tf.Tensor(
[[[-0.01499888  0.0158650

# Using new Transformer Model

In [5]:
import sys
sys.path.append("mypath")


import numpy as np
import pandas as pd
import math
import os, datetime
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error
import matplotlib.pyplot as plt
import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import pywt
import tensorflow
from numpy.random import seed
# plt.style.use('seaborn')
plt.style.use('seaborn-v0_8')

from tqdm.auto import tqdm


In [6]:
dtype = {
    'Year': 'Int64',
    'Month': 'Int64',
    'Day': 'Int64',
    'Hour': 'Int64',
    'Minute': 'Int64',
    'Global_active_power':'float32',
    'Global_reactive_power':'float32',
    'Voltage;Global_intensity':'float32',
    'Sub_metering_1':'float32',
    'Sub_metering_2':'float32',
    'Sub_metering_3':'float32'
}

def save_result(y_test,predicted_values):
    np.savetxt('./T_SWT_house4_min5_test.csv',y_test) # save path
    np.savetxt('./T_SWT_house4_min5_predicted.csv',predicted_values) # save path

df=pd.read_csv('C:/Users/Andrew/Desktop/experiments/SWT-Transformer/data ukdale/house1_5mins.csv', dtype=dtype)# path to data

def data_preparation(dataset, window, lev):
    da = []
    for i in range(len(dataset)-window):
        coeffs = pywt.swt(dataset[i:window+i], wavelet='db2', level=lev)
        da.append(coeffs);
    return da

def data_reconstruction(dataset,window):
    da = []
    for i in tqdm(range(len(dataset)), total= len(dataset), desc="iswt"):
#         recon = pywt.iswt(dataset[i,:,:,:].tolist(), 'db2')
        recon = pywt.iswt(dataset[i], 'db2')
#         print(np.array(recon).shape)
        da.append(recon[window-1])
#         da.append(recon[0][window-1])
    return da


# Called because iswt cannot accept tolist() dataset
def data_organization(coeffs):
    '''
    Reshape data back to (n,3,2,window_length), where there are 3 tuples of 2 values consisting of 
    coeffs array_like Coefficients list of tuples:
    [(cAn, cDn), ..., (cA2, cD2), (cA1, cD1)]
    '''
    reshape_list = []
    for i in range(len(coeffs)):
        reshape_list.append([])
        for j in range(len(coeffs[0])):
            reshape_list[i].append(tuple(coeffs[i][j]))
            
    return reshape_list

def create_dataset(dataset, look_back):
    dataX,dataY=[],[]

    for i in range(len(dataset)-look_back):
        a=dataset[i:(i+look_back),0:4]
        dataX.append(a)
        dataY.append(dataset[i+look_back,0:4])
    return np.array(dataX),np.array(dataY)

In [7]:
import torch
import torch.nn as nn

class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, nhead):
        super(MultiHeadAttention, self).__init__()
        self.attention = nn.MultiheadAttention(d_model, nhead)

    def forward(self, x, mask=None):
        return self.attention(x, x, x, attn_mask=mask)[0]

class SingleHeadAttention(nn.Module):
    def __init__(self, d_model):
        super(SingleHeadAttention, self).__init__()
        self.attention = nn.Linear(d_model, d_model)

    def forward(self, query, key, value, mask=None):
        scores = torch.matmul(query, key.transpose(-2, -1))
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float('-inf'))
        weights = torch.nn.functional.softmax(scores, dim=-1)
        output = torch.matmul(weights, value)
        return output

class PositionwiseFeedforward(nn.Module):
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedforward, self).__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ff, d_model)

    def forward(self, x):
        x = torch.nn.functional.relu(self.linear1(x))
        x = self.dropout(x)
        x = self.linear2(x)
        return x

class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, nhead, d_ff, dropout=0.1):
        super(TransformerEncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, nhead)
        self.feedforward = PositionwiseFeedforward(d_model, d_ff, dropout)

    def forward(self, x, mask=None):
        x = x + self.self_attn(x, mask)
        x = x + self.feedforward(x)
        return x

class TransformerDecoderLayer(nn.Module):
    def __init__(self, d_model, nhead, d_ff, dropout=0.1):
        super(TransformerDecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, nhead)
        self.multihead_attn = SingleHeadAttention(d_model)
        self.feedforward = PositionwiseFeedforward(d_model, d_ff, dropout)

    def forward(self, x, memory, src_mask=None, tgt_mask=None):
        x = x + self.self_attn(x, tgt_mask)
        x = x + self.multihead_attn(x, memory, memory, src_mask)
        x = x + self.feedforward(x)
        return x

class TransformerEncoder(nn.Module):
    def __init__(self, num_layers, d_model, nhead, d_ff, dropout=0.1):
        super(TransformerEncoder, self).__init__()
        self.layers = nn.ModuleList([TransformerEncoderLayer(d_model, nhead, d_ff, dropout) for _ in range(num_layers)])

    def forward(self, x, mask=None):
        for layer in self.layers:
            x = layer(x, mask)
        return x

class TransformerDecoder(nn.Module):
    def __init__(self, num_layers, d_model, nhead, d_ff, dropout=0.1):
        super(TransformerDecoder, self).__init__()
        self.layers = nn.ModuleList([TransformerDecoderLayer(d_model, nhead, d_ff, dropout) for _ in range(num_layers)])

    def forward(self, x, memory, src_mask=None, tgt_mask=None):
        for layer in self.layers:
            x = layer(x, memory, src_mask, tgt_mask)
        return x

class Transformer(nn.Module):
    def __init__(self, num_encoder_layers, num_decoder_layers, d_model, nhead, d_ff, input_size, output_size, dropout=0.1):
        super(Transformer, self).__init__()

        self.embedding = nn.Linear(input_size, d_model)
        self.encoder = TransformerEncoder(num_encoder_layers, d_model, nhead, d_ff, dropout)
        self.decoder = TransformerDecoder(num_decoder_layers, d_model, nhead, d_ff, dropout)
        self.fc = nn.Linear(d_model, output_size)

    def forward(self, src, tgt, src_mask=None, tgt_mask=None):
        src = self.embedding(src)
        tgt = self.embedding(tgt)

        memory = self.encoder(src, src_mask)
        output = self.decoder(tgt, memory, src_mask, tgt_mask)
        output = self.fc(output)
        return output



In [8]:
# Example usage
num_encoder_layers = 3
num_decoder_layers = 2
d_model = 256
nhead = 8
d_ff = 256
input_size = 10  # Input features for each time step
output_size = 1  # Output size for time series forecasting
dropout = 0.1

batch_size = 32
seq_len = 1

# # Create an instance of the Transformer model
# model = Transformer(num_encoder_layers, num_decoder_layers, d_model, nhead, d_ff, input_size, output_size, dropout)

# # Generate a sample input tensor
# sample_input_src = torch.rand(batch_size, seq_len, input_size)
# sample_input_tgt = torch.rand(batch_size, seq_len, input_size)

# # Forward pass
# output = model(sample_input_src, sample_input_tgt)

# # Print the output shape
# print("Input shape (source):", sample_input_src.shape)
# print("Input shape (target):", sample_input_tgt.shape)
# print("Output shape:", output.shape)

In [9]:
lev=3
inp_len=2*lev
out_len=2*lev
window=200
look_back = 12

# print(df.head())
# print(df.dtypes)
dataset = df['Volt-Ampere'].values
dataset=dataset.astype('float32')

# print("Dataset Shape:", dataset.shape)
# print("Dataset Length:", len(dataset))

# s = dataset[:12000*3]
s = dataset[:12000*3]
# s = np.squeeze(dataset[:12000*3], axis=1)  #

# Get the maximum decomposition level
max_level = pywt.swt_max_level(len(s))
print("Maximum decomposition level:", max_level)

print(s.shape)

scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
print(type(s))
da=data_preparation(s, window, lev)
# print(da[0][0])

Vv = np.array(da)
print(Vv.shape)
# print(Vv[0][0])

vv = Vv.reshape(Vv.shape[0],2*lev*Vv.shape[3])
print(vv.shape)


dataset = scaler.fit_transform(vv)

dat = dataset.reshape(Vv.shape[0],2*lev,Vv.shape[3])
print(dat.shape)

Maximum decomposition level: 5
(36000,)
<class 'numpy.ndarray'>
(35800, 3, 2, 200)
(35800, 1200)
(35800, 6, 200)


In [10]:
print(dat.shape)
alpha=0.6667
trainX,trainY=dat[:int(dat.shape[0]*alpha),:,window-seq_len-1:window-1],dat[:int(dat.shape[0]*alpha),:,window-1]
testX,testY=dat[int(dat.shape[0]*alpha):,:,window-seq_len-1:window-1],dat[int(dat.shape[0]*alpha):,:,window-1]
testX_a, testY_a = dat[:,:,window-seq_len-1:window-1],dat[:,:,window-1]

print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testX_a.shape)

# testX_a=np.transpose(testX_a, (0, 2, 1))
# trainX=np.transpose(trainX, (0, 2, 1))
# testX =np.transpose(testX, (0, 2, 1))

print(testX_a.shape)
print(trainX.shape)
print(testX.shape)

(35800, 6, 200)
(23867, 6, 1)
(23867, 6)
(11933, 6, 1)
(35800, 6, 1)
(35800, 6, 1)
(23867, 6, 1)
(11933, 6, 1)


In [11]:
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

learning_rate = 0.001
num_epochs = 10

# Training data
X_train, y_train = torch.tensor(trainX),torch.tensor(trainY)
###############################################################################
# Validation data
X_val, y_val = testX,testY
###############################################################################
# Test data
X_test, y_test = testX_a,testY_a

# Initialize the model, loss function, and optimizer
model = Transformer(num_encoder_layers, num_decoder_layers, d_model, nhead, d_ff, input_size, output_size, dropout)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)


# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for input_batch, target_batch in train_loader:
        optimizer.zero_grad()
        print(input_batch.shape)

        output = model(input_batch, input_batch)
        loss = criterion(output, input_batch)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss}")

# Evaluation on the test set
model.eval()
test_loss = 0.0
with torch.no_grad():
    for input_batch, target_batch in zip(test_input, test_target):
        # Autoregressive forecasting: Feed the true values up to the last time step
        input_seq = target_batch[:-1, :].unsqueeze(0)
        target_seq = target_batch[1:, :].unsqueeze(0)

        output = model(input_batch.unsqueeze(0), input_seq)
        loss = criterion(output, target_seq)
        test_loss += loss.item()

average_test_loss = test_loss / len(test_input)
print(f"Average Test Loss: {average_test_loss}")

torch.Size([32, 6, 1])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (192x1 and 10x256)