April 09, 2025

First version of LSTM model for integration testing

Inputs: 60 day timeseries of Tmean, Precip, solar Radiation
Output: 5 day LAI forecast



In [14]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd


In [15]:
#dummy data

# Create a date range from 1982-01-01 to 2020-12-16
date_range = pd.date_range(start='1982-01-01', end='2020-12-16', freq='D')

# Create dummy data for each column
np.random.seed(42)  # for reproducibility
n = len(date_range)

df_dummy = pd.DataFrame({
    'time': date_range,
    'LAI': np.random.rand(n) * 6,              # Leaf Area Index typically ranges from 0 to 6
    'temp': np.random.rand(n) * 30,            # Temperature in °C
    'precip': np.random.rand(n) * 10,          # Precipitation in mm
    'rad': np.random.rand(n) * 500             # Solar radiation in W/m²
})

df_dummy['year'] = df_dummy['time'].dt.year  # Add year for grouping



In [16]:
df_dummy

Unnamed: 0,time,LAI,temp,precip,rad,year
0,1982-01-01,2.247241,3.046698,4.179377,99.763683,1982
1,1982-01-02,5.704286,21.146907,4.057106,106.824625,1982
2,1982-01-03,4.391964,6.147481,0.866202,115.385113,1982
3,1982-01-04,3.591951,26.841423,0.035664,150.339348,1982
4,1982-01-05,0.936112,27.858190,8.984232,62.793034,1982
...,...,...,...,...,...,...
14225,2020-12-12,3.236606,26.186379,1.614705,121.952545,2020
14226,2020-12-13,3.106444,2.580546,2.727018,50.337004,2020
14227,2020-12-14,0.052649,0.912856,0.788239,404.974100,2020
14228,2020-12-15,0.010349,18.194335,1.356495,492.313206,2020


# Data processing 

In [17]:
# Modify the sequence function to offset the start of the sequences by 5 days for each new year
def create_sequences_by_year_offset(df, input_window=60, output_window=5, offset_per_year=5):
    X, y = [], []
    for i, (year, group) in enumerate(df.groupby('year')):
        group = group.reset_index(drop=True)
        features = group[['temp', 'precip', 'rad']].values
        lai = group['LAI'].values

        # Apply offset
        start_index = i * offset_per_year
        max_index = len(group) - input_window - output_window + 1

        for j in range(start_index, max_index):
            seq_x = np.hstack([features[j:j+input_window], lai[j:j+input_window].reshape(-1, 1)])
            seq_y = lai[j+input_window:j+input_window+output_window]
            X.append(seq_x)
            y.append(seq_y)
    return np.array(X), np.array(y)

In [18]:
#example usage 
X_offset, y_offset = create_sequences_by_year_offset(df_dummy)

# LSTM Model 

In [19]:
class LSTM_LAI(nn.Module):
    def __init__(self, input_size=4, hidden_size=64, num_layers=2, output_size=5):
        super(LSTM_LAI, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # use last timestep
        return out


# training loop 

In [20]:
# Convert to tensors
X_tensor = torch.tensor(X_offset, dtype=torch.float32)
y_tensor = torch.tensor(y_offset, dtype=torch.float32)

dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize
model = LSTM_LAI()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train
for epoch in range(2):
    for xb, yb in loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")


Epoch 1, Loss: 2.9237
Epoch 2, Loss: 2.8737


# Prediction 

In [40]:
#dummy data

# Create a date range from 1982-01-01 to 2020-12-16
date_range_test = pd.date_range(start='2021-01-01', end='2024-12-16', freq='D')

# Create dummy data for each column
np.random.seed(10)  # for reproducibility
n_test = len(date_range_test)

df_dummy_test = pd.DataFrame({
    'time': date_range_test,
    'LAI': np.random.rand(n_test) * 6,              # Leaf Area Index typically ranges from 0 to 6
    'temp': np.random.rand(n_test) * 30,            # Temperature in °C
    'precip': np.random.rand(n_test) * 10,          # Precipitation in mm
    'rad': np.random.rand(n_test) * 500             # Solar radiation in W/m²
})

df_dummy_test['year'] = df_dummy_test['time'].dt.year  # Add year for grouping

X_test, y_test = create_sequences_by_year_offset(df_dummy_test)

# Convert to tensors
X_tensor_test = torch.tensor(X_test, dtype=torch.float32)
y_tensor_test = torch.tensor(y_test, dtype=torch.float32)

model.eval()
with torch.no_grad():
    lai_pred = model(X_tensor_test).numpy()


In [41]:
# Convert to tensors
X_tensor_test = torch.tensor(X_test, dtype=torch.float32)
y_tensor_test = torch.tensor(y_test, dtype=torch.float32)


In [42]:
model.eval()
with torch.no_grad():
    lai_pred = model(X_tensor_test).numpy()


In [51]:
df_pred = pd.DataFrame({
    'true_LAI': y_test.flatten(),
    'predicted_LAI': lai_pred.flatten(),
})

In [52]:
df_pred

Unnamed: 0,true_LAI,predicted_LAI
0,3.584230,2.902947
1,5.416991,2.842479
2,3.207348,3.097130
3,3.541208,2.876272
4,0.235691,2.936420
...,...,...
5795,0.275547,2.901943
5796,5.848299,2.841746
5797,1.140387,3.094030
5798,1.252113,2.875371


# Exporting the LSTM model 

In [55]:
#save model as a "TorchScript" like this:
model_scripted = torch.jit.script(model)
model_scripted.save("../models/example_LSTM_model_v1.pt")