In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import pickle
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score,mean_absolute_percentage_error
from tqdm import tqdm


In [2]:
with open('inputs_36.pkl', 'rb') as f, open('targets_36.pkl','rb') as t:
    inputs = pickle.load(f)
    targets = pickle.load(t)
# Shuffle before splitting (preserves alignment of X and y)
X_shuffled, y_shuffled = shuffle(inputs, targets, random_state=42)

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(
    X_shuffled, y_shuffled, test_size=0.2, random_state=42
)
# X_train, X_test, y_train, y_test = train_test_split(
#     X_train, y_train, test_size=0.2, random_state=42
# )


In [3]:
# with open("inputs_small_train_36.pkl", "wb") as f:
#     pickle.dump(X_train, f)

# with open("inputs_small_test_36.pkl", "wb") as f:
#     pickle.dump(X_test, f)
# # Save targets
# with open("targets_small_train_36.pkl", "wb") as f:
#     pickle.dump(y_train, f)
# with open("targets_small_test_36.pkl", "wb") as f:
#     pickle.dump(y_test, f)

In [4]:
# with open("inputs_small_train_36.pkl", "rb") as f:
#     X_train = pickle.load(f)

# with open("inputs_small_test_36.pkl", "rb") as f:
#     X_test = pickle.load(f)
# # Save targets
# with open("targets_small_train_36.pkl", "rb") as f:
#     Y_train = pickle.load(f)
# with open("targets_small_test_36.pkl", "rb") as f:
#     Y_test = pickle.load(f)

In [5]:
# Indexes of features to normalize
norm_idx = [0, 1, 2, 3]  # NDVI, NDBI, Albedo, agg_month (example)

# Flatten for scaling
X_train_flat = X_train.reshape(-1, X_train.shape[2])
X_test_flat = X_test.reshape(-1, X_test.shape[2])

# Initialize a new array to hold the scaled data
X_train_scaled = X_train_flat.copy()
X_test_scaled = X_test_flat.copy()

scaler = StandardScaler()

# Fit and transform only the continuous columns
X_train_scaled[:, norm_idx] = scaler.fit_transform(X_train_flat[:, norm_idx])
X_test_scaled[:, norm_idx] = scaler.transform(X_test_flat[:, norm_idx])

# Reshape back to 3D
X_train = X_train_scaled.reshape(X_train.shape)
X_test = X_test_scaled.reshape(X_test.shape)


In [6]:
X_train.shape

(487502, 12, 17)

In [7]:
X_train

array([[[ 8.62006128e-01,  1.30156979e-01,  1.25853792e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 2.92296588e-01, -4.58048843e-02,  1.36475578e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 8.83926079e-02, -2.00183734e-01,  1.23975918e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        ...,
        [-3.64894867e+00, -3.26838464e-01,  2.98603754e-02, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 3.98082346e-01,  3.98319028e-02,  1.54238477e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 5.81018887e-02, -2.56466150e-01,  1.08231835e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[ 1.27319708e-01,  3.59979928e-01, -1.32668391e-01, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 1.33512661e-01,  4.29130048e-01, -1.71790630e-01, ...,
          0.00000000e+00,  0.00000000e

In [8]:
Y_train.shape

(487502,)

In [9]:
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                              torch.tensor(Y_train, dtype=torch.float32))

test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                             torch.tensor(Y_test, dtype=torch.float32))

train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False)


In [10]:
device = torch.device("cuda")
device

device(type='cuda')

In [11]:
class LSTMPredictor(nn.Module):
    def __init__(self, input_size, hidden_size=2048, num_layers=4, dropout=0.3):
        super(LSTMPredictor, self).__init__()

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout if num_layers > 1 else 0,
            batch_first=True
        )

        self.fc = nn.Linear(hidden_size, 1)  # Output = scalar

    def forward(self, x):
        # x shape: (batch_size, seq_len, input_size)
        lstm_out, _ = self.lstm(x)  # lstm_out: (batch_size, seq_len, hidden_size)
        last_time_step = lstm_out[:, -1, :]  # take the last time step's output
        out = self.fc(last_time_step)  # (batch_size, 1)
        return out.squeeze(1)  # final shape: (batch_size,)
    


In [None]:
input_size = X_train.shape[2]  # num_features per timestep
model = LSTMPredictor(input_size=input_size)
model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 100
train_losses = []
for epoch in tqdm(range(num_epochs)):
    model.train()

    for batch_x, batch_y in train_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        #print(batch_x.shape)
        #print(batch_y.shape)
        # Forward
        preds = model(batch_x)  # shape: (batch_size,)
        #print(preds.shape)
        loss = criterion(preds, batch_y)
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {loss.item()}")
    train_losses.append(loss.item())
plt.figure(figsize=(8, 5))
plt.plot(np.arange(num_epochs),train_losses, marker='o')
plt.title("Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.grid(True)
plt.tight_layout()
plt.show()

  1%|          | 1/100 [00:45<1:15:35, 45.81s/it]

Epoch 1/100 - Train Loss: 2489.421630859375


  2%|▏         | 2/100 [01:31<1:14:56, 45.88s/it]

Epoch 2/100 - Train Loss: 3009.531005859375


  3%|▎         | 3/100 [02:17<1:14:08, 45.86s/it]

Epoch 3/100 - Train Loss: 719.9663696289062


  4%|▍         | 4/100 [03:03<1:13:25, 45.89s/it]

Epoch 4/100 - Train Loss: 1069.300048828125


  5%|▌         | 5/100 [03:49<1:12:38, 45.88s/it]

Epoch 5/100 - Train Loss: 1560.80859375


  6%|▌         | 6/100 [04:35<1:11:52, 45.87s/it]

Epoch 6/100 - Train Loss: 1001.3810424804688


  7%|▋         | 7/100 [05:21<1:11:09, 45.91s/it]

Epoch 7/100 - Train Loss: 1595.3209228515625


  8%|▊         | 8/100 [06:07<1:10:22, 45.90s/it]

Epoch 8/100 - Train Loss: 1303.4228515625


  9%|▉         | 9/100 [06:53<1:09:38, 45.92s/it]

Epoch 9/100 - Train Loss: 1014.8233032226562


 10%|█         | 10/100 [07:38<1:08:51, 45.91s/it]

Epoch 10/100 - Train Loss: 2761.938720703125


 11%|█         | 11/100 [08:24<1:08:06, 45.91s/it]

Epoch 11/100 - Train Loss: 1324.4853515625


 12%|█▏        | 12/100 [09:10<1:07:18, 45.90s/it]

Epoch 12/100 - Train Loss: 1025.6209716796875


 13%|█▎        | 13/100 [09:56<1:06:33, 45.90s/it]

Epoch 13/100 - Train Loss: 1536.7642822265625


 14%|█▍        | 14/100 [10:42<1:05:46, 45.89s/it]

Epoch 14/100 - Train Loss: 1687.6951904296875


 15%|█▌        | 15/100 [11:28<1:04:59, 45.88s/it]

Epoch 15/100 - Train Loss: 3043.087890625


 16%|█▌        | 16/100 [12:14<1:04:17, 45.92s/it]

Epoch 16/100 - Train Loss: 2129.322509765625


 17%|█▋        | 17/100 [13:00<1:03:30, 45.91s/it]

Epoch 17/100 - Train Loss: 1009.5140380859375


 18%|█▊        | 18/100 [13:46<1:02:44, 45.91s/it]

Epoch 18/100 - Train Loss: 2984.508544921875


 19%|█▉        | 19/100 [14:32<1:01:58, 45.91s/it]

Epoch 19/100 - Train Loss: 1895.596923828125


 20%|██        | 20/100 [15:17<1:01:13, 45.91s/it]

Epoch 20/100 - Train Loss: 1823.4853515625


 21%|██        | 21/100 [16:03<1:00:26, 45.91s/it]

Epoch 21/100 - Train Loss: 1339.442138671875


 22%|██▏       | 22/100 [16:49<59:41, 45.92s/it]  

Epoch 22/100 - Train Loss: 773.8396606445312


 23%|██▎       | 23/100 [17:35<58:54, 45.90s/it]

Epoch 23/100 - Train Loss: 3260.3798828125


 24%|██▍       | 24/100 [18:21<58:09, 45.91s/it]

Epoch 24/100 - Train Loss: 871.0822143554688


 25%|██▌       | 25/100 [19:07<57:23, 45.91s/it]

Epoch 25/100 - Train Loss: 749.8776245117188


 26%|██▌       | 26/100 [19:53<56:36, 45.90s/it]

Epoch 26/100 - Train Loss: 1606.2308349609375


 27%|██▋       | 27/100 [20:39<55:51, 45.91s/it]

Epoch 27/100 - Train Loss: 2152.484375


 28%|██▊       | 28/100 [21:25<55:04, 45.89s/it]

Epoch 28/100 - Train Loss: 1041.021484375


 29%|██▉       | 29/100 [22:11<54:19, 45.91s/it]

Epoch 29/100 - Train Loss: 2197.90234375


 30%|███       | 30/100 [22:57<53:33, 45.90s/it]

Epoch 30/100 - Train Loss: 1856.64208984375


 31%|███       | 31/100 [23:42<52:48, 45.91s/it]

Epoch 31/100 - Train Loss: 1310.9093017578125


 32%|███▏      | 32/100 [24:28<52:01, 45.91s/it]

Epoch 32/100 - Train Loss: 1315.8045654296875


 33%|███▎      | 33/100 [25:14<51:16, 45.92s/it]

Epoch 33/100 - Train Loss: 1873.694580078125


 34%|███▍      | 34/100 [26:00<50:29, 45.90s/it]

Epoch 34/100 - Train Loss: 768.3882446289062


 35%|███▌      | 35/100 [26:46<49:43, 45.90s/it]

Epoch 35/100 - Train Loss: 739.7672729492188


 36%|███▌      | 36/100 [27:32<48:58, 45.92s/it]

Epoch 36/100 - Train Loss: 2133.597900390625


 37%|███▋      | 37/100 [28:18<48:12, 45.92s/it]

Epoch 37/100 - Train Loss: 1057.3016357421875


 38%|███▊      | 38/100 [29:04<47:27, 45.93s/it]

Epoch 38/100 - Train Loss: 2458.69287109375


 39%|███▉      | 39/100 [29:50<46:41, 45.93s/it]

Epoch 39/100 - Train Loss: 1057.7340087890625


 40%|████      | 40/100 [30:36<45:56, 45.95s/it]

Epoch 40/100 - Train Loss: 1875.7520751953125


 41%|████      | 41/100 [31:22<45:09, 45.93s/it]

Epoch 41/100 - Train Loss: 2472.83984375


 42%|████▏     | 42/100 [32:08<44:23, 45.93s/it]

Epoch 42/100 - Train Loss: 1289.814208984375


 43%|████▎     | 43/100 [32:54<43:37, 45.92s/it]

Epoch 43/100 - Train Loss: 1864.4971923828125


 44%|████▍     | 44/100 [33:39<42:51, 45.92s/it]

Epoch 44/100 - Train Loss: 1354.6453857421875


 45%|████▌     | 45/100 [34:25<42:04, 45.90s/it]

Epoch 45/100 - Train Loss: 1302.468505859375


 46%|████▌     | 46/100 [35:11<41:17, 45.89s/it]

Epoch 46/100 - Train Loss: 734.849365234375


 47%|████▋     | 47/100 [35:57<40:32, 45.91s/it]

Epoch 47/100 - Train Loss: 757.2810668945312


 48%|████▊     | 48/100 [36:43<39:46, 45.90s/it]

Epoch 48/100 - Train Loss: 2157.981201171875


 49%|████▉     | 49/100 [37:29<39:01, 45.92s/it]

Epoch 49/100 - Train Loss: 1855.9451904296875


 50%|█████     | 50/100 [38:15<38:14, 45.90s/it]

Epoch 50/100 - Train Loss: 1860.9591064453125


 51%|█████     | 51/100 [39:01<37:29, 45.91s/it]

Epoch 51/100 - Train Loss: 1009.1152954101562


 52%|█████▏    | 52/100 [39:47<36:42, 45.90s/it]

Epoch 52/100 - Train Loss: 1372.718994140625


 53%|█████▎    | 53/100 [40:33<35:57, 45.90s/it]

Epoch 53/100 - Train Loss: 1560.7982177734375


 54%|█████▍    | 54/100 [41:18<35:10, 45.89s/it]

Epoch 54/100 - Train Loss: 2242.955078125


 55%|█████▌    | 55/100 [42:04<34:24, 45.87s/it]

Epoch 55/100 - Train Loss: 2133.283935546875


 56%|█████▌    | 56/100 [42:50<33:38, 45.88s/it]

Epoch 56/100 - Train Loss: 765.3534545898438


 57%|█████▋    | 57/100 [43:36<32:52, 45.86s/it]

Epoch 57/100 - Train Loss: 1635.3094482421875


 58%|█████▊    | 58/100 [44:22<32:06, 45.88s/it]

Epoch 58/100 - Train Loss: 759.9051513671875


 59%|█████▉    | 59/100 [45:08<31:20, 45.86s/it]

Epoch 59/100 - Train Loss: 1589.9180908203125


 60%|██████    | 60/100 [45:54<30:34, 45.87s/it]

Epoch 60/100 - Train Loss: 1947.3414306640625


 61%|██████    | 61/100 [46:39<29:48, 45.85s/it]

Epoch 61/100 - Train Loss: 2380.728515625


 62%|██████▏   | 62/100 [47:25<29:02, 45.87s/it]

Epoch 62/100 - Train Loss: 196.65457153320312


 63%|██████▎   | 63/100 [48:11<28:16, 45.85s/it]

Epoch 63/100 - Train Loss: 1597.57958984375


 64%|██████▍   | 64/100 [48:57<27:29, 45.83s/it]

Epoch 64/100 - Train Loss: 2757.793212890625


 65%|██████▌   | 65/100 [49:43<26:44, 45.85s/it]

Epoch 65/100 - Train Loss: 1007.7390747070312


 66%|██████▌   | 66/100 [50:29<25:58, 45.84s/it]

Epoch 66/100 - Train Loss: 482.7400817871094


 67%|██████▋   | 67/100 [51:15<25:13, 45.86s/it]

Epoch 67/100 - Train Loss: 1298.12841796875


 68%|██████▊   | 68/100 [52:00<24:27, 45.85s/it]

Epoch 68/100 - Train Loss: 495.72406005859375


 69%|██████▉   | 69/100 [52:46<23:41, 45.87s/it]

Epoch 69/100 - Train Loss: 1314.30224609375


 70%|███████   | 70/100 [53:32<22:55, 45.86s/it]

Epoch 70/100 - Train Loss: 1334.5040283203125


 71%|███████   | 71/100 [54:18<22:10, 45.87s/it]

Epoch 71/100 - Train Loss: 1327.9912109375


 72%|███████▏  | 72/100 [55:04<21:24, 45.86s/it]

Epoch 72/100 - Train Loss: 1036.989013671875


 73%|███████▎  | 73/100 [55:50<20:38, 45.87s/it]

Epoch 73/100 - Train Loss: 1888.51123046875


 74%|███████▍  | 74/100 [56:36<19:52, 45.87s/it]

Epoch 74/100 - Train Loss: 1840.7515869140625


 75%|███████▌  | 75/100 [57:21<19:06, 45.86s/it]

Epoch 75/100 - Train Loss: 1875.911865234375


 76%|███████▌  | 76/100 [58:07<18:20, 45.86s/it]

Epoch 76/100 - Train Loss: 2704.072998046875


 77%|███████▋  | 77/100 [58:53<17:34, 45.85s/it]

Epoch 77/100 - Train Loss: 1839.97119140625


In [None]:
model.eval()
y_true = []
y_pred = []

with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        preds = model(batch_x)
        
        y_true.extend(batch_y.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

# Convert to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# If you normalized y, unnormalize here
# y_pred = y_pred * y_std + y_mean
# y_true = y_true * y_std + y_mean

# Calculate metrics
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)
mape=mean_absolute_percentage_error(y_true, y_pred)
print(f"Test MSE:  {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test MAE:  {mae:.4f}")
print(f"Test R²:   {r2:.4f}")
print(f"Test MAPE:   {mape:.4f}")

Test MSE:  383.2466
Test RMSE: 19.5767
Test MAE:  6.5344
Test R²:   0.7633
Test MAPE:   0.2249
