Imports

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
# # Import data
# data = pd.read_csv("combined3_102_sorted.csv")
# data2 = pd.read_csv("102_with_weather.csv")
# # scaler = MinMaxScaler()
# # scaled_data = scaler.fit_transform(data[['delay', 'scheduled_time', 'day', 'day_of_year']])

# weather = data2["Weather"].values
# encoder = LabelEncoder()
# unique = encoder.fit_transform(weather).reshape(-1,1)
# unique += 1

# # Data prep and seperation
# data = data[['delay', 'scheduled_time', 'day', 'day_of_year']].values
# data = np.concatenate((data, unique), axis=1)

# Obtain data
df = pd.read_csv("filename.csv")
df = df.sample(n=62700, random_state=42)
data = df[['delay','stop_id','scheduled_time','day_of_year','day']]
weather = df[['Weather']]

# Z score normalization
mean = np.mean(data.values)
std_dev = np.std(data.values)
scaled_data = (data - mean)/std_dev

scaled_data = pd.concat((scaled_data, weather), axis=1)
# Split into train and test
train_set, test_set = train_test_split(scaled_data, test_size=0.2, random_state=42)

# Sort so there is some sort of order in the sequences
train_set = train_set.sort_values(by=['day_of_year','scheduled_time'])
test_set = test_set.sort_values(by=['day_of_year','scheduled_time'])



13383.020819776715 26812.26119549741


In [32]:
# Inverse Z score normalization 
def inverse_Z_Score(array):
    array = array *std_dev
    array += mean
    return array

In [15]:

# torch.manual_seed(100)

# # # create train and test sets
# # sizes = int(len(scaled_data) * 0.2)
# # test_data = scaled_data[:sizes]
# # train_data = scaled_data[sizes:]
# train_sample = RandomSampler(data_source=scaled_data, num_samples=50100)
# test_sample = RandomSampler(data_source=scaled_data, num_samples=12600)

In [33]:
# Batches
train_data = torch.tensor(train_set.values, dtype=torch.float32)
test_data = torch.tensor(test_set.values, dtype=torch.float32)

train_batchs = DataLoader(dataset=train_data, batch_size=60, shuffle=False)  
test_batchs = DataLoader(dataset=test_data, batch_size=60, shuffle=False)

# Sequence creator
def createSequences(data, seq_length):
    x, y = [], []
    for i in range(len(data) - seq_length):
        x_data = data[i:(seq_length+i)]
        y_data = data[seq_length+i][0]
        if len(x_data) < 30:
            for i in range (30 - len(x_data)):
                x_data.append(0)
        x.append(x_data)
        y.append(y_data)
    return torch.stack(x, dim=0), torch.stack(y, dim=0)

Normal LSTM

In [6]:
class LSTM(nn.Module):
    def __init__(self, inputdim, outputdim, layerdim, dropout):
        super(LSTM, self).__init__()
        self.layerdim = layerdim
        self.lstm1 = nn.LSTM(inputdim, 108, layerdim, batch_first=True)
        self.batchnorm = nn.BatchNorm1d(108)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(108, 56, layerdim, batch_first=True)
        self.layers = nn.Sequential(
            nn.Linear(56,32),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim, x.size(0), 108)
            c1 = torch.zeros(self.layerdim, x.size(0), 108)
            h2 = torch.zeros(self.layerdim, x.size(0), 56)
            c2 = torch.zeros(self.layerdim, x.size(0), 56)
        
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # Batch Normalization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Dropout between layers
        out = self.dropout(out)
        # Second LSTM
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        # Dense layers 
        out = self.layers(out)
        out = out[:, -1, :]
        return out, h1, c1, h2, c2

BiLSTM

In [7]:
class BiLSTM(nn.Module):
    def __init__(self, inputdim, outputdim, layerdim, dropout):
        super(BiLSTM, self).__init__()
        self.layerdim = layerdim
        self.embedding = nn.Embedding(num_embeddings=25, embedding_dim=1)

        self.lstm1 = nn.LSTM(inputdim, 108, layerdim, batch_first=True, bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(216)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(108 * 2, 56, layerdim, batch_first=True, bidirectional=True)
        self.layers = nn.Sequential(
            nn.Linear(56 * 2,56),
            nn.ReLU(),
            nn.Linear(56, 32),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim*2, x.size(0), 108)
            c1 = torch.zeros(self.layerdim*2, x.size(0), 108)
            h2 = torch.zeros(self.layerdim*2, x.size(0), 56)
            c2 = torch.zeros(self.layerdim*2, x.size(0), 56)
        
        emb = x[:, :, 4].to(torch.long)

        embed = self.embedding(emb).to(torch.float32)
        x = x[:, :, :4]
        
        x = torch.cat([embed, x], dim=2)
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # Batch Normilization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Drop out between layers
        out = self.dropout(out)
        # Second LSTM layer
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        out = self.layers(out)
        out = out[:, -1, :56]
        return out, h1, c1, h2, c2

Attention

In [34]:
class AttentionBiLSTM(nn.Module):
    def __init__(self, inputdim, outputdim, numheads, layerdim, dropout):
        super(AttentionBiLSTM, self).__init__()
        self.layerdim = layerdim
        
        self.embedding = nn.Embedding(num_embeddings=25, embedding_dim=1)

        self.lstm1 = nn.LSTM(inputdim, 120, layerdim, batch_first=True, bidirectional=True)
        self.batchnorm = nn.LayerNorm(240)
        self.dropout = nn.Dropout(dropout)
        self.lstm2 = nn.LSTM(240, 60, layerdim, batch_first=True, bidirectional=True)
        self.attention = nn.MultiheadAttention(embed_dim=120, num_heads=numheads, batch_first=True)
        self.layers = nn.Sequential(
            nn.Linear(120,56),
            nn.LayerNorm(56),
            nn.ReLU(),
            nn.Linear(56, 32),
            nn.LayerNorm(32),
            nn.ReLU(),
            nn.Linear(32, outputdim)
        )
    
    def forward(self, x, h1=None, c1=None, h2=None, c2=None):
        if h1 is None or c1 is None or h2 is None or c2 is None:
            h1 = torch.zeros(self.layerdim*2, x.size(0), 120)
            c1 = torch.zeros(self.layerdim*2, x.size(0), 120)
            h2 = torch.zeros(self.layerdim*2, x.size(0), 60)
            c2 = torch.zeros(self.layerdim*2, x.size(0), 60)
        
        # Embedding
        emb = x[:, :, 5].to(torch.long)
        
        embed = self.embedding(emb).to(torch.float32)
        x = x[:, :, :5]
        
        x = torch.cat([x, embed], dim=2)

        # First LSTM
        out,(h1, c1) = self.lstm1(x, (h1,c1))

        # print("First ", out.mean().item(), "std", out.std().item())
        # Batch Normilization
        batch_size, seq_len, hidd_size = out.shape
        out = out.reshape(batch_size * seq_len, hidd_size)
        out = self.batchnorm(out)
        out = out.reshape(batch_size, seq_len, hidd_size)

        # Drop out between layers
        out = self.dropout(out)

        # Second LSTM layer
        out, (h2, c2) = self.lstm2(out, (h2, c2))
        # print("second ", out.mean().item(), "std", out.std().item())

        # Add attention layer
        out, attn_weights = self.attention(query=out, key=out,value=out)
        
        # print("after attention ", out.mean().item(), "std", out.std().item())

        # last time step output
        out = out[:, -1, :]
        
        # Final dense layers
        out = self.dropout(out)
        out = self.layers(out)
        # print("last ", out.mean().item(), "std", out.std().item())
        
        return out, h1, c1, h2, c2

model

In [35]:
# Model
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  # NON Bidirectional
# model = BiLSTM(inputdim=5, outputdim=1, layerdim=1, dropout=0.2)  # Bi Directional
model = AttentionBiLSTM(inputdim=6, outputdim=1,numheads=4, layerdim=1, dropout=0.2) # Bi Directional with Attention
loss_fcn = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


In [36]:
# Training
# print(len(train_batchs))
h1, c1, h2, c2 = None, None, None, None
epochs = 5
model.train()
for epoch in range(epochs):
    epoch_loss = 0

    for batch in train_batchs:
        optimizer.zero_grad() # Reset your gradient
        # Create sequences
        X_train, y_train = createSequences(batch, 30)
        y_train = y_train.reshape(-1,1)
        X_train = X_train.float()
        # print(X_train.shape, y_train.shape)

        # Train
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)
        # print(pred)

        loss = loss_fcn(pred, y_train)
        loss.backward(retain_graph=True)  
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # gradient clipping

        epoch_loss += loss.item()

        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()
        optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs}, loss {epoch_loss:.5f}")


Epoch 1/5, loss 3.28739
Epoch 2/5, loss 0.11496
Epoch 3/5, loss 0.08946
Epoch 4/5, loss 0.08143
Epoch 5/5, loss 0.07773


In [37]:
y_test_list, y_pred_list = [], []
h1, c1, h2, c2 = None, None, None, None
model.eval()
with torch.no_grad():
    for batch in test_batchs:
        X_test, y_test= createSequences(batch, 30)
        y_test = y_test.reshape(-1,1)
        X_test = X_test.float()

        # print(X_test.shape)

        y_pred, h1, c1, h2, c2 = model(X_test, h1, c1, h2, c2)
        
        y_pred_list.append(y_pred)
        y_test_list.append(y_test)

y_pred_list = np.array(y_pred_list).flatten()
y_test_list = np.array(y_test_list).flatten()

y_pred_list = inverse_Z_Score(y_pred_list)
y_test_list = inverse_Z_Score(y_test_list)

data_verify = pd.DataFrame(y_test_list.tolist(), columns=["Test"])
data_predicted = pd.DataFrame(y_pred_list.tolist(),columns=['Predictions'])

final_output = pd.concat([data_verify, data_predicted], axis=1)
final_output['difference'] = final_output['Test'] - final_output['Predictions']
# final_output.to_csv()
print(final_output)

total_difference = np.sum(np.abs(final_output['difference']))
pred_dev = np.std(final_output['Predictions'])
pred_mean = np.mean(final_output['Predictions'])

print(f"Average Difference: {total_difference/len(final_output['difference']):.5f}")
print(f"Standard deviation for the predictions: {pred_dev:.5f}")
print(f"Prediction mean: {pred_mean:.5f}")
# print(final_output['Predictions'])

            Test  Predictions  difference
0    -157.000977   -86.618164  -70.382812
1      15.000000   -85.020508  100.020508
2     -18.000000   -82.805664   64.805664
3     -59.000977   -80.093750   21.092773
4     -60.000000   -77.344727   17.344727
...          ...          ...         ...
6265  272.000000   -17.572266  289.572266
6266  189.999023   -16.669922  206.668945
6267  -23.000977   -15.331055   -7.669922
6268  119.999023   -13.810547  133.809570
6269  -46.000000   -12.182617  -33.817383

[6270 rows x 3 columns]
Average Difference: 195.65031
Standard deviation for the predictions: 46.53130
Prediction mean: -11.14610
