In [1]:
import torch
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from math import sqrt

In [2]:
df=pd.read_csv("V_228.csv",header=None)

In [3]:
df.shape

(12672, 228)

In [4]:
data=df.iloc[:,:5].values

In [5]:
data.shape

(12672, 5)

In [6]:
def train_test_split(data):
   test_size=1440
   validation_size=1440
   train_data=data[:-test_size - validation_size]

   validation_data=data[-test_size - validation_size:-test_size]
   test_data=data[-test_size:]
   return train_data,validation_data,test_data

In [7]:
def create_train_sequences(data,window_size,forecast_horizon):
  X=[]
  y=[]
  for i in range(len(data)-window_size- forecast_horizon + 1):
        _x = data[i:(i+window_size)]
        #_y = data[i+window_size]
        _y=data[i + window_size:i + window_size + forecast_horizon]
        X.append(_x)
        y.append(_y)
  return X,y

In [8]:
def create_test_sequences(data, window_size, forecast_horizon):
    X = []
    y = []
    step_size = window_size + forecast_horizon

    for i in range(0, len(data) - window_size - forecast_horizon + 1, step_size):
        _x = data[i:(i + window_size)]
        _y = data[i + window_size:i + window_size + forecast_horizon]
        X.append(_x)
        y.append(_y)

    return X, y

In [9]:
sc=MinMaxScaler()
df_scaled=sc.fit_transform(data)

In [10]:
df_scaled.shape

(12672, 5)

In [11]:
sequence_length=12
horizon=9
batch_size=32
input_size=1
hidden_size=64
output_size=1
ff_hiddensize=64
mask_flag=None
attn_head=8
start_dec_token_len=6

In [12]:

train_seq_x=[]
train_seq_y=[]
valid_seq_x=[]
valid_seq_y=[]
test_seq_x=[]
test_seq_y=[]
for i in range(len(df_scaled[1])):
  train_data,validation_data,test_data = train_test_split(df_scaled[:,i])
  train_x,train_y=create_train_sequences(train_data,sequence_length,horizon)
  train_seq_x.append(train_x)
  train_seq_y.append(train_y)

  valid_x,valid_y=create_train_sequences(validation_data,sequence_length,horizon)
  valid_seq_x.append(valid_x)
  valid_seq_y.append(valid_y)

  test_x,test_y=create_test_sequences(test_data,sequence_length,horizon)
  test_seq_x.append(test_x)
  test_seq_y.append(test_y)


In [13]:
X_train=torch.tensor(train_seq_x,dtype=torch.float32)
y_train=torch.tensor(train_seq_y,dtype=torch.float32)
X_valid=torch.tensor(valid_seq_x,dtype=torch.float32)
y_valid=torch.tensor(valid_seq_y,dtype=torch.float32)
X_test=torch.tensor(test_seq_x,dtype=torch.float32)
y_test=torch.tensor(test_seq_y,dtype=torch.float32)

  X_train=torch.tensor(train_seq_x,dtype=torch.float32)


In [14]:
X_train=X_train.unsqueeze(-1)
X_train=X_train.view(-1,X_train.size(2),X_train.size(3))

y_train=y_train.unsqueeze(-1)
y_train=y_train.view(-1,y_train.size(2),y_train.size(3))

X_valid=X_valid.unsqueeze(-1)
X_valid=X_valid.view(-1,X_valid.size(2),X_valid.size(3))

y_valid=y_valid.unsqueeze(-1)
y_valid=y_valid.view(-1,y_valid.size(2),y_valid.size(3))

X_test=X_test.unsqueeze(-1)
y_test=y_test.unsqueeze(-1)
"""
X_test=X_test.unsqueeze(-1)
X_test=X_test.view(-1,X_test.size(2),X_test.size(3))

y_test=y_test.unsqueeze(-1)
y_test=y_test.view(-1,y_test.size(2),y_test.size(3))
"""

'\nX_test=X_test.unsqueeze(-1)\nX_test=X_test.view(-1,X_test.size(2),X_test.size(3))\n\ny_test=y_test.unsqueeze(-1)\ny_test=y_test.view(-1,y_test.size(2),y_test.size(3))\n'

In [15]:
class TimeseriesDataset(Dataset):
  def __init__(self,X,y):
    self.X=X
    self.y=y
  def __len__(self):
    return len(self.X)
  def __getitem__(self,idx):
    return self.X[idx],self.y[idx]

In [16]:
train_dataset=TimeseriesDataset(X_train,y_train)
valid_dataset=TimeseriesDataset(X_valid,y_valid)
test_dataset=TimeseriesDataset(X_test,y_test)

In [17]:
train_loader=DataLoader(train_dataset,batch_size,drop_last=True)
valid_loader=DataLoader(valid_dataset,batch_size,drop_last=True)
test_loader=DataLoader(test_dataset,batch_size,drop_last=True)

In [18]:
class InputEmbedding(nn.Module):
  def __init__(self,input_size,hidden_size):
      super().__init__()
      self.input_size=input_size
      self.hidden_size=hidden_size
      self.conv1d=nn.Conv1d(in_channels=self.input_size,out_channels=self.hidden_size,padding=1,kernel_size=3,bias=False)
  def forward(self,x):
      embedded_inp=self.conv1d(x.permute(0,2,1))
      return embedded_inp.transpose(1,2)


In [19]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False
        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float()* -(math.log(10000.0) / d_model)).exp()
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    def forward(self,x):
        return self.pe[:,x.size(1)]

In [20]:
class AttentionLayer(nn.Module):
  def __init__(self,attn_head,hidden_size):
    super().__init__()

    self.attn_head=attn_head
    self.hidden_size=hidden_size
    self.dropout = nn.Dropout(0.1)
    self.linear=nn.Linear(hidden_size,hidden_size)

    self.queries=nn.Linear(hidden_size,hidden_size)
    self.keys=nn.Linear(hidden_size,hidden_size)
    self.values=nn.Linear(hidden_size,hidden_size)

  def forward(self, queries,keys,values,attention_mask):
    b,l,d=queries.shape
    b,s,d=keys.shape

    #Linear projection and creation of multiple heads
    queries=self.queries(queries).view(b,l,self.attn_head,-1)
    keys=self.keys(keys).view(b,s,self.attn_head,-1)
    values=self.values(values).view(b,s,self.attn_head,-1)

    b,l,h,d=queries.shape
    b,s,h,d=keys.shape

    #Calculate attention score
    attention_score=torch.einsum("blhd,bshd->bhls",queries,keys)
    if attention_mask == True:
       mask_shape = [b,h,l,s]
       mask=torch.triu(torch.ones(mask_shape,dtype=torch.bool),diagonal=1)
       attention_score.masked_fill_(mask,-np.inf)
    attention_score_softmax=self.dropout(torch.softmax(attention_score/sqrt(d),dim=-1))
    final_value=torch.einsum("bhls,bshd->blhd",attention_score_softmax,values)

    weighted_attn_val=self.linear(final_value.reshape(b,l,-1))

    return weighted_attn_val

In [21]:
class Encoder(nn.Module):
  def __init__(self,attention,hidden_size,output_size,ff_hiddensize,sequence_length):
    super().__init__()
    self.attention=attention
    self.conv1=nn.Conv1d(in_channels=hidden_size,out_channels=ff_hiddensize,kernel_size=1)
    self.conv2=nn.Conv1d(in_channels=ff_hiddensize,out_channels=hidden_size,kernel_size=1)
    self.linear=nn.Linear(ff_hiddensize,hidden_size)
    self.norm1=nn.LayerNorm(normalized_shape=(sequence_length,hidden_size))
    self.activation=F.relu
  def forward(self,x):
    #self.norm2=nn.LayerNorm(hidden_size)
    attention_x=self.attention(x,x,x,attention_mask=False)
    #add and normalize
    new_x = x + attention_x
    res_x=x=self.norm1(new_x)
    ##Feed forward NN:
    out=self.conv1(res_x.permute(0,2,1))
    out=self.activation(out)
    out=self.conv2(out).transpose(-1, 1)

    ##Add and normalize:
    new_out=out+res_x
    norm_out=self.norm1(new_out)
    return norm_out

In [22]:
class Decoder(nn.Module):
  def __init__(self,attention,hidden_size,output_size,ff_hiddensize,sequence_length):
    super().__init__()
    self.attention=attention
    #print(f"decoder output size: {output_size}")
    self.conv1=nn.Conv1d(in_channels=hidden_size,out_channels=ff_hiddensize,kernel_size=1)
    self.conv2=nn.Conv1d(in_channels=ff_hiddensize,out_channels=hidden_size,kernel_size=1)
    self.linear1=nn.Linear(hidden_size,ff_hiddensize)
    self.linear2=nn.Linear(ff_hiddensize,hidden_size)
    self.linear3=nn.Linear(hidden_size,output_size)

    self.norm1=nn.LayerNorm(hidden_size)
    self.norm2=nn.LayerNorm(hidden_size)
    self.norm3=nn.LayerNorm(hidden_size)
    self.activation=F.relu
  def forward(self,dec_inp,enc_out):
    #calculate self attention by passing dec_inp as Queries,keys and values
    self_attn=self.attention(dec_inp,dec_inp,dec_inp,attention_mask=True)
    #add residual connection and normalize
    residual_add=self_attn+dec_inp
    new_dec_x=self.norm1(residual_add)

    # encoder-decoder attention. Pass key and value as encoder output and queries are output of 1st attention
    enc_dec_atten=self.attention(new_dec_x,enc_out,enc_out,attention_mask=False)
    ## add and normalize
    new_x=enc_dec_atten+self_attn
    norm_x=self.norm2(new_x)

    #FFN
    out=self.conv1(norm_x.permute(0,2,1))
    out=self.activation(out)
    out=self.conv2(out).transpose(-1, 1)

    #add and normalize
    new_x=out+norm_x
    out=self.norm3(new_x)


    #Linear projection
    pred=self.linear3(out)

    return pred


In [23]:
class TransformerModel(nn.Module):
  def __init__(self,input_size,hidden_size,output_size,ff_hiddensize,mask_flag,attn_head,sequence_length):
      super().__init__()
      self.input_size=input_size
      self.hidden_size=hidden_size
      self.output_size=output_size
      self.ff_hidden_size=hidden_size
      self.mask_flag=mask_flag
      self.attn_head=attn_head
      self.sequence_length=sequence_length
      self.embedding=InputEmbedding(self.input_size,self.hidden_size)
      self.positional_embedding=PositionalEmbedding(self.hidden_size)
      self.encoder=Encoder( AttentionLayer(self.attn_head,hidden_size),self.hidden_size,self.output_size,self.ff_hidden_size,sequence_length)
      self.decoder=Decoder( AttentionLayer(self.attn_head,hidden_size),self.hidden_size,self.output_size,self.ff_hidden_size,sequence_length )
  def forward(self,x,y):

      #dec inp:
      #input is shifted right and concatenated with 1st time step embedding set as zero
      #decoder_input = torch.cat([torch.zeros_like(y[:, :1, :]), y[:, :-1, :]], dim=1)
      #print(f"x shape: {x[:, -start_dec_token_len:, :].shape}, y shape: {y[:, -horizon:, :].shape}")
      #print(f"x shape: {x.shape}, y shape: {y.shape}")
      #decoder_input = torch.cat((x[:, -start_dec_token_len:, :], torch.zeros_like(y[:, -horizon:, :])), dim=1)
      #decoder_input = torch.cat((x[:, :,-start_dec_token_len ,:], torch.zeros_like(y[:, :,-horizon:, :])), dim=1)
      #print(f"decoder_input shape: {decoder_input.shape}")

      decoder_input=torch.zeros_like(y[:, -horizon:, :])

      #encoder
      inp_embed=self.embedding(x)
      pos_embed=self.embedding(x)
      x_embed=inp_embed + pos_embed
      enc_out=self.encoder(x_embed)

      #decoder
      inp_embed=self.embedding(decoder_input)
      pos_embed=self.embedding(decoder_input)
      y_embed= inp_embed + pos_embed
      #print(f"y passed to decoder shape: {y.shape}")
      out=self.decoder(y_embed,enc_out)
      return out


In [24]:
model=TransformerModel(input_size,hidden_size,output_size,ff_hiddensize,mask_flag,attn_head,sequence_length)

In [25]:
loss_fun=nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.01)
epochs=30

In [26]:
for epoch in range(epochs):
        for batch_idx, (X,y) in enumerate(train_loader):

            pred=model(X,y)
            pred=pred[:,-horizon:,:]
            optimizer.zero_grad()
            loss=loss_fun(pred,y)
            loss.backward()
            optimizer.step()
        if epoch%10==0:
            print(f"epoch: {epoch} train loss:{loss} ")

epoch: 0 train loss:0.04169631004333496 
epoch: 10 train loss:0.04237790405750275 
epoch: 20 train loss:0.04239429906010628 


In [30]:
output=[]
ground_truth=[]
pred_series=[]
truth_series=[]
loss=[]
pred_total=[]
y_total=[]

for i in range(X_test.size(0)):
  curent_X_test=X_test[i,:,:,:]
  current_y_test=y_test[i,:,:,:]
  pred=model(curent_X_test,current_y_test)
  pred=pred[:,-horizon:,:]
  pred=pred.reshape(-1,1).detach().numpy()
  current_y_test=current_y_test.reshape(-1,1).detach().numpy()

  loss.append(loss_fun(torch.tensor(pred),torch.tensor(current_y_test)))



In [31]:
loss

[tensor(0.0753),
 tensor(0.0928),
 tensor(0.0408),
 tensor(0.0407),
 tensor(0.0368)]