Imports

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from sklearn.preprocessing import LabelEncoder

In [13]:
# Import data
data = pd.read_csv("combined3_102_sorted.csv")
data2 = pd.read_csv("102_with_weather.csv")
# scaler = MinMaxScaler()
# scaled_data = scaler.fit_transform(data[['delay', 'scheduled_time', 'day', 'day_of_year']])

weather = data2["Weather"].values
encoder = LabelEncoder()
unique = encoder.fit_transform(weather).reshape(-1,1)
unique += 1

# Data prep and seperation
data = data[['delay', 'scheduled_time', 'day', 'day_of_year']].values

scaled_data = np.concatenate((data, unique), axis=1)

# Z score normalization
mean = np.mean(scaled_data)
std_dev = np.std(scaled_data)
scaled_data = (scaled_data - mean)/std_dev



In [14]:
# Inverse Z score normalization 
def inverse_Z_Score(array):
    array *= std_dev
    array += mean
    return array

In [15]:

torch.manual_seed(100)

# # create train and test sets
# sizes = int(len(scaled_data) * 0.2)
# test_data = scaled_data[:sizes]
# train_data = scaled_data[sizes:]
train_sample = RandomSampler(data_source=scaled_data, num_samples=50100)
test_sample = RandomSampler(data_source=scaled_data, num_samples=12600)

In [16]:
# Batches
train_batchs = DataLoader(dataset=scaled_data, batch_size=60, shuffle=False, sampler=train_sample)  
test_batchs = DataLoader(dataset=scaled_data, batch_size=60, shuffle=False, sampler =test_sample)
# Sequence creator
def createSequences(data, seq_length):
    x, y = [], []
    for i in range(len(data) - seq_length):
        x_data = data[i:(seq_length+i)]
        y_data = data[seq_length+i][0]
        if len(x_data) < 30:
            for i in range (30 - len(x_data)):
                x_data.append(0)
        x.append(x_data)
        y.append(y_data)
    return torch.stack(x, dim=0), torch.stack(y, dim=0)

Normal LSTM

In [6]:
class LSTM(nn.Module):
    def __init__(self, inputdim, outputdim, layerdim, dropout):
        super(LSTM, self).__init__()
        self.layerdim = layerdim
        self.lstm1 = nn.LSTM(inputdim, 108, layerdim, batch_first=True)
        self.batchnorm = nn.BatchNorm1d(108)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(108, 56, layerdim, batch_first=True)
        self.layers = nn.Sequential(
            nn.Linear(56,32),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim, x.size(0), 108)
            c1 = torch.zeros(self.layerdim, x.size(0), 108)
            h2 = torch.zeros(self.layerdim, x.size(0), 56)
            c2 = torch.zeros(self.layerdim, x.size(0), 56)
        
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # Batch Normalization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Dropout between layers
        out = self.dropout(out)
        # Second LSTM
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        # Dense layers 
        out = self.layers(out)
        out = out[:, -1, :]
        return out, h1, c1, h2, c2

BiLSTM

In [7]:
class BiLSTM(nn.Module):
    def __init__(self, inputdim, outputdim, layerdim, dropout):
        super(BiLSTM, self).__init__()
        self.layerdim = layerdim
        self.embedding = nn.Embedding(num_embeddings=25, embedding_dim=1)

        self.lstm1 = nn.LSTM(inputdim, 108, layerdim, batch_first=True, bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(216)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(108 * 2, 56, layerdim, batch_first=True, bidirectional=True)
        self.layers = nn.Sequential(
            nn.Linear(56 * 2,56),
            nn.ReLU(),
            nn.Linear(56, 32),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim*2, x.size(0), 108)
            c1 = torch.zeros(self.layerdim*2, x.size(0), 108)
            h2 = torch.zeros(self.layerdim*2, x.size(0), 56)
            c2 = torch.zeros(self.layerdim*2, x.size(0), 56)
        
        emb = x[:, :, 4].to(torch.long)

        embed = self.embedding(emb).to(torch.float32)
        x = x[:, :, :4]
        
        x = torch.cat([embed, x], dim=2)
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # Batch Normilization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Drop out between layers
        out = self.dropout(out)
        # Second LSTM layer
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        out = self.layers(out)
        out = out[:, -1, :56]
        return out, h1, c1, h2, c2

Attention

In [85]:
class AttentionBiLSTM(nn.Module):
    def __init__(self, inputdim, outputdim, numheads, layerdim, dropout):
        super(AttentionBiLSTM, self).__init__()
        self.layerdim = layerdim
        
        self.embedding = nn.Embedding(num_embeddings=27, embedding_dim=1)

        self.lstm1 = nn.LSTM(inputdim, 120, layerdim, batch_first=True, bidirectional=True)
        self.batchnorm = nn.LayerNorm(240)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(240, 60, layerdim, batch_first=True, bidirectional=True)
        self.attention = nn.MultiheadAttention(embed_dim=120, num_heads=numheads, batch_first=True)
        self.layers = nn.Sequential(
            nn.Linear(120,56),
            nn.LayerNorm(56),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(56, 32),
            nn.LayerNorm(32),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim*2, x.size(0), 120)
            c1 = torch.zeros(self.layerdim*2, x.size(0), 120)
            h2 = torch.zeros(self.layerdim*2, x.size(0), 60)
            c2 = torch.zeros(self.layerdim*2, x.size(0), 60)
        
        # Embedding
        emb = x[:, :, 4].to(torch.long)
        
        embed = self.embedding(emb).to(torch.float32)
        x = x[:, :, :4]
        
        x = torch.cat([embed, x], dim=2)

        # First LSTM
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # print("First ", out.mean().item(), "std", out.std().item())
        # Batch Normilization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Drop out between layers
        out = self.dropout(out)

        # Second LSTM layer
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        # print("second ", out.mean().item(), "std", out.std().item())

        # Add attention layer
        out, attn_weights = self.attention(query=out, key=out,value=out)
        
        # print("after attention ", out.mean().item(), "std", out.std().item())

        # last time step output
        out = out[:, -1, :]
        
        # Final dense layers
        out = self.dropout(out)
        out = self.layers(out)
        # print("last ", out.mean().item(), "std", out.std().item())
        
        return out, h1, c1, h2, c2

model

In [86]:
# Model
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  # NON Bidirectional
# model = BiLSTM(inputdim=5, outputdim=1, layerdim=1, dropout=0.2)  # Bi Directional
model = AttentionBiLSTM(inputdim=5, outputdim=1,numheads=4, layerdim=1, dropout=0.4) # Bi Directional with Attention
loss_fcn = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


In [None]:
# Training
# print(len(train_batchs))
h1, c1, h2, c2 = None, None, None, None
epochs = 3
model.train()
for epoch in range(epochs):
    epoch_loss = 0

    for batch in train_batchs:
        # Create sequences
        X_train, y_train = createSequences(batch, 30)
        y_train = y_train.reshape(-1,1)
        X_train = X_train.float()
        # print(X_train.shape, y_train.shape)
        
        # Train
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)
        # print(pred)

        loss = loss_fcn(pred, y_train)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)  
        optimizer.step()

        epoch_loss += loss.item()

        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()
    
    print(f"Epoch {epoch+1}/{epochs}, loss {epoch_loss:.5f}")


Epoch 1/3, loss 87.18633


In [81]:
y_test_list, y_pred_list = [], []
h1, c1, h2, c2 = None, None, None, None
model.eval()
with torch.no_grad():
    for batch in test_batchs:
        X_test, y_test= createSequences(batch, 30)
        y_test = y_test.reshape(-1,1)
        X_test = X_test.float()

        # print(X_test.shape)

        y_pred, h1, c1, h2, c2 = model(X_test, h1, c1, h2, c2)
        
        y_pred_list.append(y_pred)
        y_test_list.append(y_test)

y_pred_list = np.array(y_pred_list).flatten()
y_test_list = np.array(y_test_list).flatten()

y_pred_list = inverse_Z_Score(y_pred_list)
y_test_list = inverse_Z_Score(y_test_list)

data_verify = pd.DataFrame(y_test_list.tolist(), columns=["Test"])
data_predicted = pd.DataFrame(y_pred_list.tolist(),columns=['Predictions'])

final_output = pd.concat([data_verify, data_predicted], axis=1)
final_output['difference'] = final_output['Test'] - final_output['Predictions']
print(final_output.head())
print(final_output['Predictions'])

    Test  Predictions  difference
0  -16.0   250.474609 -266.474609
1   62.0   254.425781 -192.425781
2  208.0   252.135742  -44.135742
3   36.0   255.016602 -219.016602
4  649.0   252.337891  396.662109
0       250.474609
1       254.425781
2       252.135742
3       255.016602
4       252.337891
           ...    
6295    248.999023
6296    249.666992
6297    249.233398
6298    251.889648
6299    250.345703
Name: Predictions, Length: 6300, dtype: float64
