In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
p_gwl = Path("../data/AquiMod_simobs_Gretna.csv")
p_met = Path("../data/ukcp18_simobs_Gretna.csv")
df_gwl = pd.read_csv(p_gwl, parse_dates=["Date"], dayfirst=True)
df_met = pd.read_csv(p_met, parse_dates=["Date"], dayfirst=True)
df_data = pd.merge(left=df_met, right=df_gwl, on=["Borehole", "Model", "Date"], how="inner").dropna()
df_data

Unnamed: 0,Borehole,Model,Date,precipwsnow,PET,Sim,Obs
11419,Gretna,AquiMod,1993-04-07,0.096710,1.530000,39.9447,40.084
11420,Gretna,AquiMod,1993-04-08,22.228661,1.530000,39.9812,40.082
11421,Gretna,AquiMod,1993-04-09,9.274128,1.530000,40.0087,40.106
11422,Gretna,AquiMod,1993-04-10,0.089421,1.530000,40.0106,40.121
11423,Gretna,AquiMod,1993-04-11,1.071286,1.530000,40.0064,40.135
...,...,...,...,...,...,...,...
20814,Gretna,AquiMod,2018-12-27,0.153541,0.248387,39.9721,39.959
20815,Gretna,AquiMod,2018-12-28,1.296672,0.248387,39.9695,39.953
20816,Gretna,AquiMod,2018-12-29,1.978836,0.248387,39.9697,39.950
20817,Gretna,AquiMod,2018-12-30,0.029849,0.248387,39.9671,39.941


I should plot this dataset here

In [3]:
fig1 = px.line(df_data, x="Date", y="Obs")
fig1.show()
fig2 = px.line(df_data, x="Date", y="precipwsnow")
fig2.show()
fig3 = px.line(df_data, x="Date", y="PET")
fig3.show()

In [4]:
precip = df_data["precipwsnow"].values
pet = df_data["PET"].values
gwl = df_data["Obs"].values

In [5]:
# Concatenate the features
features_arr = np.column_stack((precip, pet))
features_arr.shape

(9400, 2)

In [6]:
# Normalize the features
scaler = MinMaxScaler(feature_range=(-1, 1))
features_scaled_arr = scaler.fit_transform(features_arr)
features_scaled_arr.shape

(9400, 2)

In [None]:
# # Normalise the target
# target_scaler = MinMaxScaler(feature_range=(-1, 1))
# gwl_scaled_arr = target_scaler.fit_transform(gwl)

In [7]:
def create_sequences(data, seq_length):
    """
    Transforms time-series data into sequences of a specified length.

    Parameters:
    data (np.array): A 2D numpy array where each row is a time step and each column is a feature.
    seq_length (int): The number of time steps to include in each output sequence.

    Returns:
    np.array: A 3D numpy array of shape (num_samples - seq_length + 1, seq_length, num_features).
    """

    xs = []  # Initialise an empty list to store sequences

    # For each possible sequence in the data...
    for i in range(len(data) - seq_length + 1):
        # Extract a sequence of length `seq_length`
        x = data[i:(i+seq_length)]
        # Append the sequence to the list
        xs.append(x)

    # Convert the list of sequences into a 3D numpy array
    return np.array(xs)

seq_length = 20
features_seq_arr = create_sequences(features_scaled_arr, seq_length)
# Also need to make sure the first 365 elements of the gwl array are clipped
gwl_arr = gwl[seq_length - 1:]
print(features_seq_arr.shape)
print(gwl_arr.shape)

(9381, 20, 2)
(9381,)


In [8]:
# Convert to PyTorch tensors
# Note that the target variable, GWL, does not need rescaling
features_tensor: torch.Tensor = torch.from_numpy(features_seq_arr).float()
gwl_tensor: torch.Tensor = torch.from_numpy(gwl_arr).float().unsqueeze(1)
print(features_tensor.shape)
print(gwl_tensor.shape)

torch.Size([9381, 20, 2])
torch.Size([9381, 1])


In [9]:
# Split into training and test sets
train_size = int(len(features_tensor) * 0.8)
test_size = len(features_tensor) - train_size

features_train = features_tensor[:train_size]
features_test = features_tensor[train_size:]
gwl_train  = gwl_tensor[:train_size]
gwl_test = gwl_tensor[train_size:]

print(f"{features_train.shape}: {gwl_train.shape}")
print(f"{features_test.shape}: {gwl_test.shape}")

torch.Size([7504, 20, 2]): torch.Size([7504, 1])
torch.Size([1877, 20, 2]): torch.Size([1877, 1])


In [10]:
features_train.shape

torch.Size([7504, 20, 2])

**CAUTION** the following code block is taken from another video tutorial

When you initialise an instance of TimeSeriesDataset, you pass in X and y. Here, X is expected to be a 2D array-like object where each row is a separate sample and each column is a separate feature. So, if you have multiple features, X would have multiple columns.

The __getitem__ method returns the i-th sample and its corresponding target value. This will be a tuple, where the first element is a 1D array (the feature vector for the i-th sample) and the second element is the target value.

When you use this dataset to train your LSTM, each sample (which could contain multiple features) will be an input to the LSTM. Just ensure that the input_size parameter of your LSTM matches the number of features in your data. 😊

In [11]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

# Work on this tomorrow
train_dataset = TimeSeriesDataset(features_train, gwl_train)
test_dataset = TimeSeriesDataset(features_test, gwl_test)


batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Explore the dataloader
for batch in train_loader:
    print(type(batch), ": ", len(batch))
    print(batch[0].shape)
    print(batch[1].shape)
    break

<class 'list'> :  2
torch.Size([16, 20, 2])
torch.Size([16, 1])


`nn.Linear` is a class in PyTorch that applies a linear transformation to the incoming data. It's often referred to as a fully connected layer or a dense layer in neural networks. The transformation it applies is: `y = xA^T + b`, where `x` is the input, `A` is the weight matrix, `b` is the bias, and `y` is the output.

In your LSTM model, the `nn.Linear` layer is used at the end of the network. The reason for this is to transform the output of the LSTM layers to the desired output shape. 

In the case of your code, the LSTM layers output a tensor of shape `(batch_size, seq_length, hidden_size)`. The `nn.Linear` layer transforms this to `(batch_size, seq_length, output_size)`. In your specific case, `output_size` is 1 because you're doing a regression task (predicting groundwater levels), so you want a single continuous value as output for each sequence in the batch.

So, the LSTM layers learn the temporal dynamics of the data, and the final linear layer maps these learned features to the target variable (groundwater levels in your case). This is a common architecture in many tasks involving sequence data. 😊

In the context of Long Short-Term Memory (LSTM) networks, `h` and `c` represent the hidden state and the cell state, respectively.

- **Hidden State (`h`)**: This is the output of the LSTM unit. It's a function of the current input and the previous cell state. The hidden state can be used for predictions, and is also passed to the LSTM unit at the next time step.

- **Cell State (`c`)**: This is the "memory" of the LSTM unit. It stores long-term information. The cell state is updated at each time step by removing (forgetting) irrelevant parts and adding (storing) relevant parts of the current input and the previous hidden state.

The ability to update and manipulate these states allows the LSTM to learn and remember over long sequences, and to handle issues like vanishing and exploding gradients that can occur in traditional recurrent neural networks (RNNs). 😊

The batch_first argument in nn.LSTM is a boolean flag that changes the expected input format.

If batch_first is set to False (which is the default), the expected input shape is (seq_len, batch, input_size). Here, seq_len is the length of the sequence, batch is the batch size, and input_size is the number of features.

If batch_first is set to True, the expected input shape is (batch, seq_len, input_size). This is often more convenient and matches the ordering used by other parts of PyTorch, like the nn.Linear layer.

In your code, batch_first=True is used, which means your LSTM expects input tensors where the batch size is the first dimension. 😊

The line `self.lstm(x, (h0, c0))` is where the input tensor `x` and the initial hidden and cell states `(h0, c0)` are passed into the LSTM layer.

Here's a breakdown:

- `x` is the input tensor, which should have the shape `(batch_size, seq_length, input_size)` if `batch_first=True`, or `(seq_length, batch_size, input_size)` if `batch_first=False`.

- `(h0, c0)` are the initial hidden and cell states for the LSTM. `h0` and `c0` are both tensors of shape `(num_layers, batch_size, hidden_size)`. They are usually initialized to zeros, but can be set to other values for specific use cases.

The `self.lstm(x, (h0, c0))` call returns two outputs:

1. `out`: A tensor containing the output features from the LSTM. If `batch_first=True`, it will have the shape `(batch_size, seq_length, hidden_size)`. If `batch_first=False`, it will have the shape `(seq_length, batch_size, hidden_size)`.

2. `(hn, cn)`: Tensors containing the hidden and cell states for `t = seq_len`. These are useful when the LSTM is part of a larger network and its final states are needed for further computation.

In your code, the `out` tensor is passed to a fully connected layer (`self.fc`) to get the final output of the model, while `(hn, cn)` are not used. 😊

In [12]:
# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        # self.lstm.reset_parameters()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [13]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [14]:
# Initialize the model, loss function, and optimizer
model = LSTM(input_size=2, hidden_size=1, num_layers=1, output_size=1).to(device)

class NSELoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, predictions: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
        denominator = torch.sum((targets - torch.mean(targets)) ** 2)
        numerator = torch.sum((targets - predictions) ** 2)
        nse_val = numerator / denominator
        return nse_val

# criterion = nn.MSELoss()
criterion = NSELoss()

In [15]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [16]:
for batch_index, batch in enumerate(train_loader):
    print(batch[0].shape)
    print(batch[1].shape)
    break

torch.Size([16, 20, 2])
torch.Size([16, 1])


In [17]:
# Training function
def train_epoch(epoch: int):
    model.train()
    for batch in train_loader:
        running_loss = 0
        # batch is a list with two elements
        # The first element is the feature array
        # The second element is the GWL array
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        output = model(x_batch)
        loss = criterion(output, y_batch)
        running_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # print(f'Epoch: {epoch + 1} Final Batch, Loss: {running_loss}')



In [18]:
# Testing
def test_epoch(epoch: int):
    model.eval()
    running_loss = 0
    for i, batch in enumerate(test_loader):
        x_batch, y_batch = batch[0].to(device), batch[1].to(device)
        with torch.no_grad():
            output = model(x_batch)
            loss = criterion(output, y_batch)
            running_loss += loss.item()
    avg_loss_across_batches = running_loss / len(test_loader)
    
    # print(f'Epoch: {epoch + 1} Test Loss: {avg_loss_across_batches}')
    # print('***************************************************')
    print()
    return avg_loss_across_batches

In [19]:
def nse(predictions: np.ndarray, targets: np.ndarray) -> float:
    denominator = np.sum((targets - np.mean(targets)) ** 2)
    numerator = np.sum((targets - predictions) ** 2)
    nse_val = 1 - numerator / denominator
    return nse_val

In [20]:
num_epochs = 100
test_loss = []
for epoch in range(num_epochs):
    train_epoch(epoch)
    test_loss.append(test_epoch(epoch))
    with torch.no_grad():
        train_predicted = model(features_train.to(device)).to('cpu').numpy()
        test_predicted = model(features_test.to(device)).to('cpu').numpy()
    train_nse = nse(train_predicted.flatten(), gwl_train.numpy().flatten())
    test_nse = nse(test_predicted.flatten(), gwl_test.numpy().flatten())
    print(f"Epoch {epoch + 1}, Train NSE: {train_nse}")
    print(f"Epoch {epoch + 1}, Test NSE: {test_nse}")
    

    if (epoch + 1) != num_epochs:
        continue
    df = pd.DataFrame()
    df["Training Observed"] = np.append(gwl_train.numpy().flatten(), np.full(len(test_predicted), np.nan))
    df["Training Predicted"] = np.append(train_predicted.flatten(), np.full(len(gwl_test), np.nan))
    df["Testing Predicted"] = np.append(np.full(len(gwl_train), np.nan), test_predicted)
    df["Testing Observed"] = np.append(np.full(len(train_predicted), np.nan), gwl_test)
    fig = px.line(df)
    fig.show()

    fig = px.line(np.array(test_loss))
    fig.show()




Epoch 1, Train NSE: -22770.263671875
Epoch 1, Test NSE: -25287.509765625

Epoch 2, Train NSE: -22121.447265625
Epoch 2, Test NSE: -24567.4296875

Epoch 3, Train NSE: -21320.017578125
Epoch 3, Test NSE: -23679.583984375

Epoch 4, Train NSE: -20645.5
Epoch 4, Test NSE: -22932.181640625

Epoch 5, Train NSE: -20029.072265625
Epoch 5, Test NSE: -22249.06640625

Epoch 6, Train NSE: -19444.9921875
Epoch 6, Test NSE: -21601.751953125

Epoch 7, Train NSE: -18882.330078125
Epoch 7, Test NSE: -20978.14453125

Epoch 8, Train NSE: -18335.6015625
Epoch 8, Test NSE: -20372.1640625

Epoch 9, Train NSE: -17801.75390625
Epoch 9, Test NSE: -19780.44140625

Epoch 10, Train NSE: -17278.96875
Epoch 10, Test NSE: -19200.958984375

Epoch 11, Train NSE: -16766.119140625
Epoch 11, Test NSE: -18632.466796875

Epoch 12, Train NSE: -16262.4931640625
Epoch 12, Test NSE: -18074.17578125

Epoch 13, Train NSE: -15767.6162109375
Epoch 13, Test NSE: -17525.5625

Epoch 14, Train NSE: -15281.1767578125
Epoch 14, Test NSE

It seems that the larger the model, the smaller the learning rate required

In [22]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
num_epochs = 50
test_loss = []
for epoch in range(num_epochs):
    train_epoch(epoch)
    test_loss.append(test_epoch(epoch))
    with torch.no_grad():
        train_predicted = model(features_train.to(device)).to('cpu').numpy()
        test_predicted = model(features_test.to(device)).to('cpu').numpy()
    train_nse = nse(train_predicted.flatten(), gwl_train.numpy().flatten())
    test_nse = nse(test_predicted.flatten(), gwl_test.numpy().flatten())
    print(f"Epoch {epoch + 1}, Train NSE: {train_nse}")
    print(f"Epoch {epoch + 1}, Test NSE: {test_nse}")
    

    if (epoch + 1) != num_epochs:
        continue
    df = pd.DataFrame()
    df["Training Observed"] = np.append(gwl_train.numpy().flatten(), np.full(len(test_predicted), np.nan))
    df["Training Predicted"] = np.append(train_predicted.flatten(), np.full(len(gwl_test), np.nan))
    df["Testing Predicted"] = np.append(np.full(len(gwl_train), np.nan), test_predicted)
    df["Testing Observed"] = np.append(np.full(len(train_predicted), np.nan), gwl_test)
    fig = px.line(df)
    fig.show()

    fig = px.line(np.log(np.array(test_loss)))
    fig.show()


Epoch 1, Train NSE: -0.06480300426483154
Epoch 1, Test NSE: -0.37359070777893066

Epoch 2, Train NSE: -0.06483340263366699
Epoch 2, Test NSE: -0.37366747856140137

Epoch 3, Train NSE: -0.06492435932159424
Epoch 3, Test NSE: -0.37389707565307617

Epoch 4, Train NSE: -0.06503081321716309
Epoch 4, Test NSE: -0.37416529655456543

Epoch 5, Train NSE: -0.06513714790344238
Epoch 5, Test NSE: -0.37443339824676514

Epoch 6, Train NSE: -0.06521308422088623
Epoch 6, Test NSE: -0.37462520599365234

Epoch 7, Train NSE: -0.06528162956237793
Epoch 7, Test NSE: -0.3747977018356323

Epoch 8, Train NSE: -0.06535768508911133
Epoch 8, Test NSE: -0.37498939037323

Epoch 9, Train NSE: -0.06542623043060303
Epoch 9, Test NSE: -0.37516212463378906

Epoch 10, Train NSE: -0.06549477577209473
Epoch 10, Test NSE: -0.37533485889434814

Epoch 11, Train NSE: -0.06554841995239258
Epoch 11, Test NSE: -0.3754690885543823

Epoch 12, Train NSE: -0.06560158729553223
Epoch 12, Test NSE: -0.3756033182144165

Epoch 13, Train

In [78]:
# features_train, gwl_train
# features_test, gwl_test

with torch.no_grad():
    predicted = model(features_train.to(device)).to('cpu').numpy()

df = pd.DataFrame()
df["observed"] = gwl_train.numpy().flatten()
df["modelled"] = predicted.flatten()
fig = px.line(df)
fig.show()

In [19]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f"The model has {count_parameters(model)} parameters")

The model has 31251 parameters


In the example below, `ArrayModel` is a Pydantic model that expects a single attribute `array` which should be a numpy array. The `check_dimension` method is a validator for `array` that checks if `array` is 2D and if the size of its second dimension is 3. If not, it raises a `ValueError`.

You can use a similar approach to validate PyTorch tensors or other data types. However, keep in mind that this adds an extra layer of complexity to your code, and it might be simpler and more efficient to write custom validation functions, especially for complex data types or validation rules.

I hope this helps! Let me know if you have any other questions. 😊

In [None]:
from pydantic import BaseModel, validator

class ArrayModel(BaseModel):
    array: np.ndarray

    @validator('array')
    def check_dimension(cls, array):
        if array.ndim != 2 or array.shape[1] != 3:
            raise ValueError('array must be 2D and the size of the second dimension must be 3')
        return array