In [None]:
# Import necessary libraries
import pandas as pd   # For data manipulation
import numpy as np    # For numerical operations
import torch          # PyTorch library for deep learning
import torch.nn as nn # Neural network module in PyTorch

from sklearn.preprocessing import MinMaxScaler  # Data normalization
from torch.nn.utils.rnn import pad_sequence  # Helps in padding sequences (not used here)
from torch.utils.data import TensorDataset, DataLoader  # Dataset and batch handling

# Import custom LSTM and BiLSTM models from an external file (LSTM.py)
from LSTM import LSTM, BiLSTM

# -------------------------- Load and Normalize Data -------------------------- #

# Load the dataset from CSV file
data = pd.read_csv("filename.csv")  

# Extract relevant features (EXCLUDING 'delay')
data = data[["scheduled_time", "day", "day_of_year", "Weather"]].values  # Now input has 4 features

# Store target variable separately
target = pd.read_csv("filename.csv")["delay"].values  # Load delay separately

# Perform Z-score normalization (standardization)
mean = np.mean(data)       # Compute mean of the dataset
std_dev = np.std(data)     # Compute standard deviation
scaled_data = (data - mean) / std_dev  # Normalize the data

# -------------------------- Batch Processing -------------------------- #

# Create batches using PyTorch DataLoader
batchs = DataLoader(dataset=scaled_data, batch_size=60, shuffle=False)  
# Each batch will contain 60 samples, and shuffle is set to False to maintain order

# -------------------------- Sequence Creator Function -------------------------- #

def createSequences(data, seq_length):
    """
    Converts time-series data into sequences of length `seq_length`.
    Inputs:
        - data: The time-series dataset
        - seq_length: Number of timesteps per sequence (e.g., 30)
    Outputs:
        - x: A tensor of sequences (features)
        - y: A tensor of target values (labels)
    """
    x, y = [], []
    
    for i in range(len(data) - seq_length):  # Iterate through dataset
        if isinstance(data, np.ndarray):  # If data is a NumPy array
            x_data = torch.from_numpy(data[i:(seq_length + i)].astype(np.float32))
        else:  # If data is already a PyTorch tensor
            x_data = data[i:(seq_length + i)].clone().detach()

        y_data = torch.tensor(target[seq_length + i], dtype=torch.float32)  # Convert to tensor
        
        if len(x_data) == 30:  # Ensure the sequence length is exactly 30
            x.append(x_data)
            y.append(y_data)
    
    return torch.stack(x, dim=0), torch.stack(y, dim=0)  # Convert lists to tensors

# -------------------------- Model Selection -------------------------- #

# Uncomment one of the models below based on whether you want a standard LSTM or BiLSTM
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  # Non-bidirectional LSTM
model = BiLSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  # Bi-directional LSTM

# -------------------------- Loss Function and Optimizer -------------------------- #

loss_fcn = nn.L1Loss()  # Mean Absolute Error (L1 Loss)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer with learning rate 0.01

# -------------------------- Initialize Hidden and Cell States -------------------------- #

h1, c1, h2, c2 = None, None, None, None  # Hidden and cell states for BiLSTM (LSTM would only use h1, c1)

# -------------------------- Training Loop -------------------------- #

for batch in batchs:  # Iterate through each batch of 60 samples

    # Convert batch data into sequences for training
    X_train, y_train = createSequences(batch, 30)  # Generate sequences of length 30
    y_train = y_train.reshape(-1, 1)  # Reshape labels to be a column vector
    X_train = X_train.float()  # Convert data to PyTorch float tensor

    # Set the number of epochs (iterations over the dataset)
    epochs = 5  
    h1, c1, h2, c2 = None, None, None, None  # Reset hidden states at the start of each batch

    for epoch in range(epochs):  # Iterate through epochs

        # Forward pass: Get predictions from the model
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)

        # Compute loss (error between predicted and actual delay values)
        loss = loss_fcn(pred, y_train)  
        print(f"Epoch {epoch}: Loss = {loss.item()}")  # Print loss for monitoring training progress

        # Backward pass (gradient computation and optimization)
        optimizer.zero_grad()  # Clear previous gradients
        loss.backward(retain_graph=True)  # Compute gradients and retain computational graph
        optimizer.step()  # Update model parameters using gradients

        # Detach hidden states from computation graph to prevent memory buildup
        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()


In [17]:
# Import necessary libraries
import pandas as pd  
import numpy as np    
import torch          
import torch.nn as nn 

from sklearn.preprocessing import MinMaxScaler  
from torch.utils.data import TensorDataset, DataLoader  
from LSTM import LSTM, BiLSTM  # Import custom LSTM/BiLSTM models

# -------------------------- Load and Normalize Data -------------------------- #

# Load dataset from CSV file
data = pd.read_csv("filename.csv")  

# Extract relevant features (EXCLUDING 'delay')
features = data[["scheduled_time", "day", "day_of_year", "Weather"]].values
target = data["delay"].values  # Store target variable separately

# Perform Z-score normalization (standardization)
mean = np.mean(features)       
std_dev = np.std(features)     
scaled_features = (features - mean) / std_dev  

# Convert NumPy arrays to PyTorch tensors
scaled_features = torch.tensor(scaled_features, dtype=torch.float32)
target = torch.tensor(target, dtype=torch.float32)

# Combine features & target into a dataset
dataset = TensorDataset(scaled_features, target)

# Create DataLoader for batching
batchs = DataLoader(dataset, batch_size=60, shuffle=False)

# -------------------------- Sequence Creator Function -------------------------- #

def createSequences(data, target, seq_length):
    """
    Converts time-series data into sequences of length `seq_length`.
    Inputs:
        - data: Input features
        - target: Target variable (delay)
        - seq_length: Number of timesteps per sequence (e.g., 30)
    Outputs:
        - x: Tensor of sequences (features)
        - y: Tensor of target values (labels)
    """
    x, y = [], []
    
    for i in range(len(data) - seq_length):
        if isinstance(data, np.ndarray):  # If data is a NumPy array
            x_data = torch.from_numpy(data[i:(seq_length + i)].astype(np.float32))
        else:  # If data is already a PyTorch tensor
            x_data = data[i:(seq_length + i)].clone().detach()  # Prevent unnecessary copy

        y_data = target[seq_length + i]  

        if len(x_data) == seq_length:
            x.append(x_data)  # Append directly (no need for torch.tensor)
            y.append(y_data)  # Append directly (no need for torch.tensor)
    
    return torch.stack(x), torch.stack(y)
# -------------------------- Model Selection -------------------------- #

# Uncomment one of the models below based on whether you want a standard LSTM or BiLSTM
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  
model = BiLSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  

# -------------------------- Loss Function and Optimizer -------------------------- #

loss_fcn = nn.L1Loss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  

# -------------------------- Initialize Hidden and Cell States -------------------------- #

h1, c1, h2, c2 = None, None, None, None  

# -------------------------- Training Loop -------------------------- #

for batch in batchs:  
    batch_features, batch_targets = batch  # Unpack batch into features & target

    # Convert batch data into sequences for training
    X_train, y_train = createSequences(batch_features, batch_targets, 30)  
    y_train = y_train.unsqueeze(1)  # Ensures correct shape
    X_train = X_train.float()  

    # Training hyperparameters
    epochs = 5  
    h1, c1, h2, c2 = None, None, None, None  

    for epoch in range(epochs):  

        # Forward pass: Get predictions from the model
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)

        # Compute loss (error between predicted and actual delay values)
        loss = loss_fcn(pred, y_train)  
        print(f"Epoch {epoch}: Loss = {loss.item()}")  

        # Backward pass (gradient computation and optimization)
        optimizer.zero_grad()  
        loss.backward(retain_graph=True)  
        optimizer.step()  

        # Detach hidden states from computation graph to prevent memory buildup
        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()

Epoch 3: Loss = 95.3957748413086
Epoch 4: Loss = 95.12748718261719
Epoch 0: Loss = 305.2689514160156
Epoch 1: Loss = 301.11505126953125
Epoch 2: Loss = 299.9402770996094
Epoch 3: Loss = 299.00927734375
Epoch 4: Loss = 298.2845458984375
Epoch 0: Loss = 252.47926330566406
Epoch 1: Loss = 256.65130615234375
Epoch 2: Loss = 258.3968505859375
Epoch 3: Loss = 258.2491760253906
Epoch 4: Loss = 257.4620361328125
Epoch 0: Loss = 320.70257568359375
Epoch 1: Loss = 321.4596252441406
Epoch 2: Loss = 322.01776123046875
Epoch 3: Loss = 320.23394775390625
Epoch 4: Loss = 317.9660339355469
Epoch 0: Loss = 226.72930908203125
Epoch 1: Loss = 226.84999084472656
Epoch 2: Loss = 226.36080932617188
Epoch 3: Loss = 226.00079345703125
Epoch 4: Loss = 225.87742614746094
Epoch 0: Loss = 246.20452880859375


KeyboardInterrupt: 

MODEL

In [None]:
# Import necessary libraries
import pandas as pd  
import numpy as np    
import torch          
import torch.nn as nn 

from sklearn.preprocessing import MinMaxScaler  
from torch.utils.data import TensorDataset, DataLoader  
from LSTM import LSTM, BiLSTM  # Import custom LSTM/BiLSTM models

# -------------------------- Load and Normalize Data -------------------------- #

# Load dataset from CSV file
data = pd.read_csv("filename.csv")  

# Extract relevant features (EXCLUDING 'delay')
features = data[["scheduled_time", "day", "day_of_year", "Weather"]].values
target = data["delay"].values  # Store target variable separately

# Perform Z-score normalization (standardization)
mean = np.mean(features)       
std_dev = np.std(features)     
scaled_features = (features - mean) / std_dev


# Convert NumPy arrays to PyTorch tensors
scaled_features = torch.tensor(scaled_features, dtype=torch.float32)
target = torch.tensor(target, dtype=torch.float32)

# -------------------------- Sequence Creator Function -------------------------- #

def createSequences(data, target, seq_length):
    """
    Converts time-series data into sequences of length `seq_length`.
    Inputs:
        - data: Input features
        - target: Target variable (delay)
        - seq_length: Number of timesteps per sequence (e.g., 30)
    Outputs:
        - x: Tensor of sequences (features)
        - y: Tensor of target values (labels)
    """
    x, y = [], []
    
    for i in range(len(data) - seq_length):
        x_data = data[i:(seq_length + i)].clone().detach()  # Prevent unnecessary copy
        y_data = target[seq_length + i]  

        if len(x_data) == seq_length:
            x.append(x_data)
            y.append(y_data)
    
    return torch.stack(x), torch.stack(y)

# -------------------------- Preprocess Sequences Before Training -------------------------- #

seq_length = 30  # Set sequence length

# Generate sequences once before training
X_train, y_train = createSequences(scaled_features, target, seq_length)

# Ensure target shape is correct
if y_train.dim() == 1:
    y_train = y_train.unsqueeze(1)  # Only reshape if needed

# Create DataLoader with sequences instead of raw data
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=60, shuffle=True)

# -------------------------- Model Selection -------------------------- #

# Uncomment one of the models below based on whether you want a standard LSTM or BiLSTM
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  
model = BiLSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  

# -------------------------- Loss Function and Optimizer -------------------------- #

loss_fcn = nn.L1Loss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  

# -------------------------- Initialize Hidden and Cell States -------------------------- #

h1, c1, h2, c2 = None, None, None, None  

# -------------------------- Training Loop -------------------------- #

for batch in train_loader:  
    X_train, y_train = batch  # Unpack preprocessed sequences

    # Convert tensors to float
    X_train = X_train.float()  
    y_train = y_train.float()  

    # Training hyperparameters
    epochs = 5  
    h1, c1, h2, c2 = None, None, None, None  

    for epoch in range(epochs):  

        # Forward pass: Get predictions from the model
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)

        # Compute loss (error between predicted and actual delay values)
        loss = loss_fcn(pred, y_train)  
        print(f"Epoch {epoch}: Loss = {loss.item()}")  

        # Backward pass (gradient computation and optimization)
        optimizer.zero_grad()  
        loss.backward()  # No need for retain_graph=True
        optimizer.step()  

        # Detach hidden states from computation graph to prevent memory buildup
        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()


Epoch 0: Loss = 139.8643341064453
Epoch 1: Loss = 139.8562469482422
Epoch 2: Loss = 139.84005737304688
Epoch 3: Loss = 139.8239288330078
Epoch 4: Loss = 139.79754638671875
Epoch 0: Loss = 223.61636352539062
Epoch 1: Loss = 223.42901611328125
Epoch 2: Loss = 222.66514587402344
Epoch 3: Loss = 221.70867919921875
Epoch 4: Loss = 220.3386688232422
Epoch 0: Loss = 156.2718048095703
Epoch 1: Loss = 155.46788024902344
Epoch 2: Loss = 154.62765502929688
Epoch 3: Loss = 153.92135620117188
Epoch 4: Loss = 152.8518524169922
Epoch 0: Loss = 264.13079833984375
Epoch 1: Loss = 278.2731628417969
Epoch 2: Loss = 269.2742614746094
Epoch 3: Loss = 265.1445007324219
Epoch 4: Loss = 260.8049011230469
Epoch 0: Loss = 157.0080108642578
Epoch 1: Loss = 156.26385498046875
Epoch 2: Loss = 158.70382690429688
Epoch 3: Loss = 156.88092041015625
Epoch 4: Loss = 154.3319549560547
Epoch 0: Loss = 205.6983184814453
Epoch 1: Loss = 182.06759643554688
Epoch 2: Loss = 178.1352996826172
Epoch 3: Loss = 173.9292755126953


MODEL WITH SCALED TARGET AND MSE LOSS (MAYBE) AND DELAY IN MINUTES

In [None]:
# Import necessary libraries
import pandas as pd  
import numpy as np    
import torch          
import torch.nn as nn 

from sklearn.preprocessing import MinMaxScaler  
from torch.utils.data import TensorDataset, DataLoader  
from LSTM import LSTM, BiLSTM  # Import custom LSTM/BiLSTM models

# -------------------------- Load and Normalize Data -------------------------- #

# Load dataset from CSV file
data = pd.read_csv("filename.csv")  

# Extract relevant features (EXCLUDING 'delay')
features = data[["scheduled_time", "day", "day_of_year", "Weather"]].values
target = data["delay"].values / 60 # Store target variable separately

# Perform Z-score normalization (standardization)
mean = np.mean(features)       
std_dev = np.std(features)     
scaled_features = (features - mean) / std_dev

# Convert NumPy arrays to PyTorch tensors
scaled_features = torch.tensor(scaled_features, dtype=torch.float32)
target = torch.tensor(target, dtype=torch.float32)

# -------------------------- Sequence Creator Function -------------------------- #

def createSequences(data, target, seq_length):
    """
    Converts time-series data into sequences of length `seq_length`.
    Inputs:
        - data: Input features
        - target: Target variable (delay)
        - seq_length: Number of timesteps per sequence (e.g., 30)
    Outputs:
        - x: Tensor of sequences (features)
        - y: Tensor of target values (labels)
    """
    x, y = [], []
    
    for i in range(len(data) - seq_length):
        x_data = data[i:(seq_length + i)].clone().detach()  # Prevent unnecessary copy
        y_data = target[seq_length + i]  

        if len(x_data) == seq_length:
            x.append(x_data)
            y.append(y_data)
    
    return torch.stack(x), torch.stack(y)

# -------------------------- Preprocess Sequences Before Training -------------------------- #

seq_length = 30  # Set sequence length

# Generate sequences once before training
X_train, y_train = createSequences(scaled_features, target, seq_length)

# Ensure target shape is correct
if y_train.dim() == 1:
    y_train = y_train.unsqueeze(1)  # Only reshape if needed

# Create DataLoader with sequences instead of raw data
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=60, shuffle=True)

# -------------------------- Model Selection -------------------------- #

# Uncomment one of the models below based on whether you want a standard LSTM or BiLSTM
# model = LSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  
model = BiLSTM(inputdim=4, outputdim=1, layerdim=1, dropout=0.2)  

# -------------------------- Loss Function and Optimizer -------------------------- #

loss_fcn = nn.L1Loss()  
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  

# -------------------------- Initialize Hidden and Cell States -------------------------- #

h1, c1, h2, c2 = None, None, None, None  

# -------------------------- Training Loop -------------------------- #

epochs = 5  
for epoch in range(epochs):
    h1, c1, h2, c2 = None, None, None, None  
    for batch in train_loader:  
        X_train, y_train = batch  # Unpack preprocessed sequences

        # Convert tensors to float
        X_train = X_train.float()  
        y_train = y_train.float()  

        # Training hyperparameter

        # Forward pass: Get predictions from the model
        pred, h1, c1, h2, c2 = model(X_train, h1, c1, h2, c2)

        # Compute loss (error between predicted and actual delay values)
        loss = loss_fcn(pred, y_train)  
        print(f"Epoch {epoch}: Loss = {loss.item()}")  

        # Backward pass (gradient computation and optimization)
        optimizer.zero_grad()  
        loss.backward()  # No need for retain_graph=True
        optimizer.step()  

        # Detach hidden states from computation graph to prevent memory buildup
        h1 = h1.detach()
        c1 = c1.detach()
        h2 = h2.detach()
        c2 = c2.detach()


Epoch 0: Loss = 3.072319269180298
Epoch 1: Loss = 3.0474355220794678
Epoch 2: Loss = 2.9933674335479736
Epoch 3: Loss = 2.9812159538269043
Epoch 4: Loss = 2.994694948196411
Epoch 0: Loss = 4.209108829498291
Epoch 1: Loss = 4.19798469543457
Epoch 2: Loss = 4.202397346496582
Epoch 3: Loss = 4.171690464019775
Epoch 4: Loss = 4.2162299156188965
Epoch 0: Loss = 4.127368450164795
Epoch 1: Loss = 4.108709812164307
Epoch 2: Loss = 4.043426036834717
Epoch 3: Loss = 4.057947635650635
Epoch 4: Loss = 3.9549012184143066
Epoch 0: Loss = 3.1647746562957764
Epoch 1: Loss = 3.181128740310669
Epoch 2: Loss = 3.1189095973968506
Epoch 3: Loss = 3.1052846908569336
Epoch 4: Loss = 3.0869245529174805
Epoch 0: Loss = 3.067998170852661
Epoch 1: Loss = 3.0961174964904785
Epoch 2: Loss = 2.980842113494873
Epoch 3: Loss = 2.996847629547119
Epoch 4: Loss = 2.977745532989502
Epoch 0: Loss = 3.412574291229248
Epoch 1: Loss = 3.4293177127838135
Epoch 2: Loss = 3.4100584983825684
Epoch 3: Loss = 3.3926119804382324
Ep

KeyboardInterrupt: 