In [29]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [30]:
p_gwl: Path = Path("../data/AquiMod_simobs_Gretna.csv")
p_met: Path = Path("../data/ukcp18_simobs_Gretna.csv")
df_gwl: pd.DataFrame = pd.read_csv(p_gwl)
df_met: pd.DataFrame = pd.read_csv(p_met)
df_data: pd.DataFrame = pd.merge(left=df_met, right=df_gwl, on=["Borehole", "Model", "Date"], how="inner").dropna()

In [31]:
df_data

Unnamed: 0,Borehole,Model,Date,precipwsnow,PET,Sim,Obs
11419,Gretna,AquiMod,07/04/1993,0.096710,1.530000,39.9447,40.084
11420,Gretna,AquiMod,08/04/1993,22.228661,1.530000,39.9812,40.082
11421,Gretna,AquiMod,09/04/1993,9.274128,1.530000,40.0087,40.106
11422,Gretna,AquiMod,10/04/1993,0.089421,1.530000,40.0106,40.121
11423,Gretna,AquiMod,11/04/1993,1.071286,1.530000,40.0064,40.135
...,...,...,...,...,...,...,...
20814,Gretna,AquiMod,27/12/2018,0.153541,0.248387,39.9721,39.959
20815,Gretna,AquiMod,28/12/2018,1.296672,0.248387,39.9695,39.953
20816,Gretna,AquiMod,29/12/2018,1.978836,0.248387,39.9697,39.950
20817,Gretna,AquiMod,30/12/2018,0.029849,0.248387,39.9671,39.941


In [32]:
precip: np.ndarray = df_data["precipwsnow"].values
pet: np.ndarray = df_data["PET"].values
gwl: np.ndarray = df_data["Obs"].values

In [33]:
# Concatenate the features
features_arr: np.ndarray = np.column_stack((precip, pet))
features_arr

array([[ 0.09671013,  1.53      ],
       [22.22866094,  1.53      ],
       [ 9.27412802,  1.53      ],
       ...,
       [ 1.97883607,  0.2483871 ],
       [ 0.02984906,  0.2483871 ],
       [ 0.1009835 ,  0.2483871 ]])

In [34]:
# Normalize the features
scaler: MinMaxScaler = MinMaxScaler(feature_range=(-1, 1))
features_scaled_arr: np.ndarray = scaler.fit_transform(features_arr)
features_scaled_arr

array([[-0.99757221, -0.09694581],
       [-0.4419762 , -0.09694581],
       [-0.76718417, -0.09694581],
       ...,
       [-0.9503237 , -0.87980296],
       [-0.99925068, -0.87980296],
       [-0.99746493, -0.87980296]])

In [35]:
# Convert to PyTorch tensors
# Note that the target variable, GWL, does not need rescaling
features_tensor: torch.Tensor = torch.from_numpy(features_scaled_arr).float()
gwl_tensor: torch.Tensor = torch.from_numpy(gwl).float()

In [39]:
# Split into training and test sets
train_size = int(len(features_tensor) * 0.8)
test_size = len(features_tensor) - train_size

features_train = features_tensor[:train_size]
features_test = features_tensor[train_size:]
groundwater_level_train  = gwl_tensor[:train_size]
groundwater_level_test = gwl_tensor[train_size:]

**CAUTION** the following code block is taken from another video tutorial

When you initialize an instance of TimeSeriesDataset, you pass in X and y. Here, X is expected to be a 2D array-like object where each row is a separate sample and each column is a separate feature. So, if you have multiple features, X would have multiple columns.

The __getitem__ method returns the i-th sample and its corresponding target value. This will be a tuple, where the first element is a 1D array (the feature vector for the i-th sample) and the second element is the target value.

When you use this dataset to train your LSTM, each sample (which could contain multiple features) will be an input to the LSTM. Just ensure that the input_size parameter of your LSTM matches the number of features in your data. 😊

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

# Work on this tomorrow
"""
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

batch_size = 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
"""

`nn.Linear` is a class in PyTorch that applies a linear transformation to the incoming data. It's often referred to as a fully connected layer or a dense layer in neural networks. The transformation it applies is: `y = xA^T + b`, where `x` is the input, `A` is the weight matrix, `b` is the bias, and `y` is the output.

In your LSTM model, the `nn.Linear` layer is used at the end of the network. The reason for this is to transform the output of the LSTM layers to the desired output shape. 

In the case of your code, the LSTM layers output a tensor of shape `(batch_size, seq_length, hidden_size)`. The `nn.Linear` layer transforms this to `(batch_size, seq_length, output_size)`. In your specific case, `output_size` is 1 because you're doing a regression task (predicting groundwater levels), so you want a single continuous value as output for each sequence in the batch.

So, the LSTM layers learn the temporal dynamics of the data, and the final linear layer maps these learned features to the target variable (groundwater levels in your case). This is a common architecture in many tasks involving sequence data. 😊

In the context of Long Short-Term Memory (LSTM) networks, `h` and `c` represent the hidden state and the cell state, respectively.

- **Hidden State (`h`)**: This is the output of the LSTM unit. It's a function of the current input and the previous cell state. The hidden state can be used for predictions, and is also passed to the LSTM unit at the next time step.

- **Cell State (`c`)**: This is the "memory" of the LSTM unit. It stores long-term information. The cell state is updated at each time step by removing (forgetting) irrelevant parts and adding (storing) relevant parts of the current input and the previous hidden state.

The ability to update and manipulate these states allows the LSTM to learn and remember over long sequences, and to handle issues like vanishing and exploding gradients that can occur in traditional recurrent neural networks (RNNs). 😊

The batch_first argument in nn.LSTM is a boolean flag that changes the expected input format.

If batch_first is set to False (which is the default), the expected input shape is (seq_len, batch, input_size). Here, seq_len is the length of the sequence, batch is the batch size, and input_size is the number of features.

If batch_first is set to True, the expected input shape is (batch, seq_len, input_size). This is often more convenient and matches the ordering used by other parts of PyTorch, like the nn.Linear layer.

In your code, batch_first=True is used, which means your LSTM expects input tensors where the batch size is the first dimension. 😊

The line `self.lstm(x, (h0, c0))` is where the input tensor `x` and the initial hidden and cell states `(h0, c0)` are passed into the LSTM layer.

Here's a breakdown:

- `x` is the input tensor, which should have the shape `(batch_size, seq_length, input_size)` if `batch_first=True`, or `(seq_length, batch_size, input_size)` if `batch_first=False`.

- `(h0, c0)` are the initial hidden and cell states for the LSTM. `h0` and `c0` are both tensors of shape `(num_layers, batch_size, hidden_size)`. They are usually initialized to zeros, but can be set to other values for specific use cases.

The `self.lstm(x, (h0, c0))` call returns two outputs:

1. `out`: A tensor containing the output features from the LSTM. If `batch_first=True`, it will have the shape `(batch_size, seq_length, hidden_size)`. If `batch_first=False`, it will have the shape `(seq_length, batch_size, hidden_size)`.

2. `(hn, cn)`: Tensors containing the hidden and cell states for `t = seq_len`. These are useful when the LSTM is part of a larger network and its final states are needed for further computation.

In your code, the `out` tensor is passed to a fully connected layer (`self.fc`) to get the final output of the model, while `(hn, cn)` are not used. 😊

In [44]:
# Define the LSTM model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

Continue from the above code onwards

In [45]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [46]:
# Initialize the model, loss function, and optimizer
model = LSTM(input_size=2, hidden_size=50, num_layers=2, output_size=1).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [47]:
# Move data to the device
features_train = features_train.to(device)
groundwater_level_train = groundwater_level_train.to(device)
features_test = features_test.to(device)
groundwater_level_test = groundwater_level_test.to(device)

I need to convert the tensors into a dataset and data loader, I think

In [48]:
# Training loop
for epoch in range(100):  # 100 epochs
    model.train()
    optimizer.zero_grad()
    outputs = model(features_train)
    loss = criterion(outputs, groundwater_level_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss.item()}")

RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors

In [None]:
# Testing
model.eval()
with torch.no_grad():
    predictions = model(features_test)

In [None]:
# Print the first 5 predictions
print(predictions[:5])

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f"The model has {count_parameters(model)} parameters")