In [1]:
import pandas as pd
import numpy as np

import rasterio
from skimage.transform import resize
from skimage.transform import rotate
import os

import torch
from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.model_selection import train_test_split

from datetime import timedelta
from skimage.draw import polygon
import matplotlib.pyplot as plt

from shapely.geometry import Polygon

#### Import Yield Data

In [2]:
from utils import process_yield_data
from pathlib import Path
YIELD_DATA_PATH = Path("./combined_yield_data.csv")
yield_data_weekly = process_yield_data(YIELD_DATA_PATH)

            Volume (Pounds)  Cumulative Volumne (Pounds)  Pounds/Acre
Date                                                                 
2012-01-02          23400.0                      23400.0          2.0
2012-01-03          26064.0                      49464.0          3.0
2012-01-04          32382.0                      81846.0          3.0
2012-01-05          69804.0                     151650.0          7.0
2012-01-06          18000.0                     169650.0          2.0

Number of Yield Data Points:  3970

Column Names: Index(['Volume (Pounds)', 'Cumulative Volumne (Pounds)', 'Pounds/Acre'], dtype='object')
Number of Yield Data Points: 2879
Yield data with time features:
            Volume (Pounds)  Cumulative Volumne (Pounds)  Pounds/Acre  \
Date                                                                    
2012-03-04         525753.0                    1785843.0    18.333333   
2012-03-11        2949534.0                    4735377.0    51.666667   
2012-03-18   

#### Define the Model

In [3]:
target_shape = (512, 512)
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [4]:
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(CNNFeatureExtractor, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.flattened_size = self._get_conv_output((1, *target_shape))
        self.fc1 = nn.Linear(self.flattened_size, 512)

    def _get_conv_output(self, shape):
        x = torch.rand(1, *shape)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        n_size = x.view(1, -1).size(1)
        return n_size

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = self.dropout(x)
        x = x.view(-1, self.flattened_size)
        x = F.relu(self.fc1(x))
        return x
    
class HybridModel(nn.Module):
    def __init__(self, cnn_feature_extractor, lstm_hidden_size=64, lstm_layers=1):
        super(HybridModel, self).__init__()
        self.cnn = cnn_feature_extractor
        self.lstm = nn.LSTM(input_size=512, hidden_size=lstm_hidden_size, num_layers=lstm_layers, batch_first=True)
        self.fc1 = nn.Linear(lstm_hidden_size + 6, 64)
        self.fc2 = nn.Linear(64, target_shape[0] * target_shape[1])  # Predict a value per pixel
        self.target_shape = target_shape

    def forward(self, x, time_features):
        batch_size, time_steps, C, H, W = x.size()
        c_in = x.view(batch_size * time_steps, C, H, W)
        c_out = self.cnn(c_in)
        r_in = c_out.view(batch_size, time_steps, -1)
        r_out, (h_n, c_n) = self.lstm(r_in)
        r_out = r_out[:, -1, :]
        x = torch.cat((r_out, time_features), dim=1)  # Concatenate LSTM output with time features
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.view(batch_size, *self.target_shape)  # Reshape to the target shape
        return x

#### Initialize Function

In [5]:
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

# Instantiate model with weight decay regularization
cnn_feature_extractor = CNNFeatureExtractor()
model = HybridModel(cnn_feature_extractor)
model.apply(weights_init)
model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

#### Training loop with early stopping

In [6]:
best_loss = float('inf')
patience = 3
trigger_times = 0
epochs = 50

# for epoch in range(epochs):
#     running_loss = 0.0
#     model.train()
#     for i, (inputs, labels, time_features) in enumerate(tqdm(train_loader)):
#         if device != "cpu":
#             inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
#         optimizer.zero_grad()
#         outputs = model(inputs, time_features)
#         labels = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()
#     epoch_loss = running_loss / len(train_loader)
#     scheduler.step()
#     print(f'Epoch {epoch+1}, Loss: {epoch_loss}')

#     # Early stopping
#     if epoch_loss < best_loss:
#         best_loss = epoch_loss
#         trigger_times = 0
#         torch.save(model.state_dict(), 'best_hybrid_model.pth')  # Save best model
#     else:
#         trigger_times += 1
#         if trigger_times >= patience:
#             print("Early stopping!")
#             break


# def train_model(model, optimizer, scheduler, criterion, train_loader, val_loader, epochs, device, patience=3):
#     best_loss = float('inf')
#     trigger_times = 0
    
#     for epoch in range(epochs):
#         model.train()
#         running_loss = 0.0
#         for inputs, labels, time_features in tqdm(train_loader):
#             inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
#             optimizer.zero_grad()
#             outputs = model(inputs, time_features)
#             labels_expanded = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
#             loss = criterion(outputs, labels_expanded)
#             loss.backward()
#             optimizer.step()
#             running_loss += loss.item()
        
#         epoch_loss = running_loss / len(train_loader)
#         scheduler.step()
        
#         val_loss = 0.0
#         model.eval()
#         with torch.no_grad():
#             for inputs, labels, time_features in val_loader:
#                 inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
#                 outputs = model(inputs, time_features)
#                 labels_expanded = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
#                 loss = criterion(outputs, labels_expanded)
#                 val_loss += loss.item()
        
#         val_loss /= len(val_loader)
#         print(f'Epoch {epoch+1}, Training Loss: {epoch_loss}, Validation Loss: {val_loss}')
        
#         # Early stopping
#         if val_loss < best_loss:
#             best_loss = val_loss
#             trigger_times = 0
#             torch.save(model.state_dict(), 'best_hybrid_model.pth')  # Save best model
#         else:
#             trigger_times += 1
#             if trigger_times >= patience:
#                 print("Early stopping!")
#                 break

# # Training and validation
# # model = HybridModel(CNNFeatureExtractor()).to(device)
# # criterion = nn.MSELoss()
# # optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
# # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# # train_model(model, optimizer, scheduler, criterion, train_loader, val_loader, epochs=50, device=device)

# DIEGO: is everything above out-of-date and only the functions below are relevant?

### Functions for prediction

In [7]:
from inference_utils import (
    preprocess_image,
    compute_mean_std,
    load_evi_data_and_prepare_features,
    find_closest_date,
    find_closest_date_in_df,
    mask_evi_data,
    predict,
    predict_weekly_yield
)


evi_data_dir = "./landsat_evi_monterey_masked"
evi_data_dict, time_features, mean, std  = load_evi_data_and_prepare_features(evi_data_dir, time_index=yield_data_weekly.index, target_shape=target_shape)

In [8]:
# #TODO: Convert the pixel area to physical area (may require a dynamic conversion factor depending on the zoom from the UI)
# #TODO: Pass the target date (for prediction) from the UI to this script
                                                                                                                                                                                            
# # Example polygon coordinates (we need to replace these with actual coordinates from your UI) - this one is just a box
# polygon_coords = np.array([
#     [100, 100],
#     [100, 200],
#     [200, 200],
#     [200, 100],
#     [100, 100]
# ])

# # Calculate the area based on the polygon coordinates
# polygon = Polygon(polygon_coords)
# polygon_area = polygon.area 

# # Convert polygon area from pixel units to acres (using the LandSat Conversion Factor)
# conversion_factor = 30  # 1 pixel = 30m^2
# polygon_area_acres = polygon_area * conversion_factor

# # Start date for predictions
# start_date = pd.to_datetime("2022-07-01") # This is an example date; we need to take this from the UI

# # Load and preprocess the EVI data
# time_index = yield_data_weekly.index
# evi_data_dict, time_features_list, mean, std = load_evi_data_and_prepare_features(evi_data_dir, time_index, target_shape)

# # Generate weekly predictions
# dates, predicted_yields = predict_weekly_yield(evi_data_dict, yield_data_weekly, start_date, polygon_area_acres, mean, std, target_shape, model, device)

# # Convert predictions to a numpy array
# predicted_yields = np.array(predicted_yields).flatten()

### Plot the predictions

In [9]:
# # Plot the predicted yields as a trend line
# plt.figure(figsize=(10, 6))
# plt.plot(dates, predicted_yields, marker='o', linestyle='-', color='b')
# plt.xlabel('Date')
# plt.ylabel('Predicted Yield (Pounds)')
# plt.title(f'Predicted Weekly Yield for Selected Polygon Area (Starting {start_date})')
# plt.grid(True)
# plt.show()

### Model Evaluation (Basic Evaluation Metrics)

In [10]:
# # Function to evaluate the model on the test set
# def evaluate_model(model, test_loader, mean, std, target_shape, device):
#     model.eval()
#     all_true = []
#     all_pred = []
    
#     with torch.no_grad():
#         for inputs, labels, time_features in test_loader:
#             if device != "cpu":
#                 inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
            
#             outputs = model(inputs, time_features)
#             labels = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
            
#             all_true.append(labels.cpu().numpy())
#             all_pred.append(outputs.cpu().numpy())
    
#     all_true = np.concatenate(all_true).flatten()
#     all_pred = np.concatenate(all_pred).flatten()
    
#     mse = mean_squared_error(all_true, all_pred)
#     rmse = np.sqrt(mse)
#     mae = mean_absolute_error(all_true, all_pred)
#     r2 = r2_score(all_true, all_pred)
    
#     return mse, rmse, mae, r2

# # Evaluate the model on the test set
# mse, rmse, mae, r2 = evaluate_model(model, test_loader, mean, std, target_shape, device)

# print(f"Mean Squared Error (MSE): {mse}")
# print(f"Root Mean Squared Error (RMSE): {rmse}")
# print(f"Mean Absolute Error (MAE): {mae}")
# print(f"R-squared (R²): {r2}")

### Model Evaluation (Cross Validation)

In [18]:
# Cross-validation
tscv = TimeSeriesSplit(n_splits=5)

mse_scores = []
rmse_scores = []
mae_scores = []
r2_scores = []

def train_and_evaluate(model, train_loader, val_loader, optimizer, scheduler, criterion, epochs, device):
    best_loss = float('inf')
    patience = 3
    trigger_times = 0
    
    for epoch in range(epochs):
        running_loss = 0.0
        model.train()
        for inputs, labels, time_features in tqdm(train_loader):
            inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, time_features)
            labels = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(train_loader)
        scheduler.step()
        print(f'Epoch {epoch + 1}, Loss: {epoch_loss}')

        # Early stopping
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print("Early stopping!")
                break

    # Evaluate on validation set
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels, time_features in val_loader:
            inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
            outputs = model(inputs, time_features)
            labels = labels.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    print(f'Validation Loss: {val_loss}')
    return val_loss

for fold, (train_index, val_index) in enumerate(tscv.split(yield_data_weekly)):
    print(f"Fold {fold + 1}")
    print(f"       Train Index = {train_index[0]}, ..., {train_index[-1]} len={len(train_index)}")
    print(f"       Valid Index = {val_index[0]}, ..., {val_index[-1]} len={len(val_index)}")

    train_dates = yield_data_weekly.index[train_index].intersection(evi_data_dict.keys())
    val_dates = yield_data_weekly.index[val_index].intersection(evi_data_dict.keys())
    print(f"       Train Dates = {train_dates[0]}, ..., {train_dates[-1]} len={len(train_dates)}")
    print(f"       Valid Dates = {val_dates[0]}, ..., {val_dates[-1]} len={len(val_dates)}")

    train_index = yield_data_weekly.index.get_indexer(train_dates)
    val_index = yield_data_weekly.index.get_indexer(val_dates)
    print(f"  (get)Train Index = {train_index[0]}, ..., {train_index[-1]} len={len(train_index)}")
    print(f"  (get)Valid Index = {val_index[0]}, ..., {val_index[-1]} len={len(val_index)}")

    evi_train = np.array([evi_data_dict[date] for date in train_dates])
    evi_val = np.array([evi_data_dict[date] for date in val_dates])
    print(f"  evi_train.shape  = {evi_train.shape}")
    print(f"  evi_val.shape    = {evi_val.shape}")

    time_features_train = yield_data_weekly.loc[train_dates][['month_sin', 'month_cos', 'day_of_year_sin', 'day_of_year_cos', 'Volume (Pounds)', 'Cumulative Volumne (Pounds)']].values
    time_features_val = yield_data_weekly.loc[val_dates][['month_sin', 'month_cos', 'day_of_year_sin', 'day_of_year_cos', 'Volume (Pounds)', 'Cumulative Volumne (Pounds)']].values
    print(f"  time_features_train.shape  = {time_features_train.shape}")
    print(f"  time_features_val.shape    = {time_features_val.shape}")
    labels_train = yield_data_weekly.loc[train_dates]['Volume (Pounds)'].values
    labels_val = yield_data_weekly.loc[val_dates]['Volume (Pounds)'].values
    print(f"  labels_train.shape  = {labels_train.shape}")
    print(f"  labels_val.shape    = {labels_val.shape}")

    evi_train = torch.tensor(evi_train, dtype=torch.float32).unsqueeze(1).unsqueeze(2).to(device)
    evi_val = torch.tensor(evi_val, dtype=torch.float32).unsqueeze(1).unsqueeze(2).to(device)
    print(f"  (torch)evi_train.shape  = {evi_train.shape}")
    print(f"  (torch)evi_val.shape    = {evi_val.shape}")
    time_features_train = torch.tensor(time_features_train, dtype=torch.float32).to(device)
    time_features_val = torch.tensor(time_features_val, dtype=torch.float32).to(device)
    labels_train = torch.tensor(labels_train, dtype=torch.float32).to(device)
    labels_val = torch.tensor(labels_val, dtype=torch.float32).to(device)

    train_loader = DataLoader(list(zip(evi_train, labels_train, time_features_train)), batch_size=2, shuffle=True)
    val_loader = DataLoader(list(zip(evi_val, labels_val, time_features_val)), batch_size=2, shuffle=False)

    model = HybridModel(CNNFeatureExtractor())
    model.apply(weights_init)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    criterion = nn.MSELoss()

    val_loss = train_and_evaluate(model, train_loader, val_loader, optimizer, scheduler, criterion, epochs, device)
    
    model.eval()
    with torch.no_grad():
        outputs_val = model(evi_val, time_features_val)

    outputs_val_flat = outputs_val.cpu().numpy().flatten()
    labels_val_expanded = labels_val.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
    labels_val_flat = labels_val_expanded.cpu().numpy().flatten()

    mse = mean_squared_error(labels_val_flat, outputs_val_flat)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(labels_val_flat, outputs_val_flat)
    r2 = r2_score(labels_val_flat, outputs_val_flat)

    mse_scores.append(mse)
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    r2_scores.append(r2)

print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Average MAE: {np.mean(mae_scores)}")
print(f"Average R-squared: {np.mean(r2_scores)}")



Fold 1
       Train Index = 0, ..., 110 len=111
       Valid Index = 111, ..., 216 len=106
       Train Dates = 2014-02-23 00:00:00, ..., 2014-02-23 00:00:00 len=1
       Valid Dates = 2016-04-17 00:00:00, ..., 2016-04-17 00:00:00 len=1
  (get)Train Index = 103, ..., 103 len=1
  (get)Valid Index = 215, ..., 215 len=1
  evi_train.shape  = (1, 512, 512)
  evi_val.shape    = (1, 512, 512)
  time_features_train.shape  = (1, 6)
  time_features_val.shape    = (1, 6)
  labels_train.shape  = (1,)
  labels_val.shape    = (1,)
  (torch)evi_train.shape  = torch.Size([1, 1, 1, 512, 512])
  (torch)evi_val.shape    = torch.Size([1, 1, 1, 512, 512])


100%|██████████| 1/1 [00:00<00:00, 20.94it/s]


Epoch 1, Loss: 0.13545666635036469


100%|██████████| 1/1 [00:00<00:00, 24.23it/s]


Epoch 2, Loss: 0.1347959339618683


100%|██████████| 1/1 [00:00<00:00, 23.50it/s]


Epoch 3, Loss: 0.11724184453487396


100%|██████████| 1/1 [00:00<00:00, 23.36it/s]


Epoch 4, Loss: 0.07898657023906708


100%|██████████| 1/1 [00:00<00:00, 22.57it/s]


Epoch 5, Loss: 0.08247167617082596


100%|██████████| 1/1 [00:00<00:00, 22.68it/s]


Epoch 6, Loss: 0.06390894949436188


100%|██████████| 1/1 [00:00<00:00, 23.15it/s]


Epoch 7, Loss: 0.05569910630583763


100%|██████████| 1/1 [00:00<00:00, 21.36it/s]


Epoch 8, Loss: 0.056405600160360336


100%|██████████| 1/1 [00:00<00:00, 22.42it/s]


Epoch 9, Loss: 0.053138814866542816


100%|██████████| 1/1 [00:00<00:00, 22.82it/s]


Epoch 10, Loss: 0.049059562385082245


100%|██████████| 1/1 [00:00<00:00, 22.06it/s]


Epoch 11, Loss: 0.04412923380732536


100%|██████████| 1/1 [00:00<00:00, 22.90it/s]


Epoch 12, Loss: 0.04986071586608887


100%|██████████| 1/1 [00:00<00:00, 22.93it/s]


Epoch 13, Loss: 0.04482012614607811


100%|██████████| 1/1 [00:00<00:00, 22.40it/s]


Epoch 14, Loss: 0.04446239769458771
Early stopping!
Validation Loss: 0.2333991825580597
Fold 2
       Train Index = 0, ..., 216 len=217
       Valid Index = 217, ..., 322 len=106
       Train Dates = 2014-02-23 00:00:00, ..., 2016-04-17 00:00:00 len=2
       Valid Dates = 2017-10-29 00:00:00, ..., 2017-10-29 00:00:00 len=1
  (get)Train Index = 103, ..., 215 len=2
  (get)Valid Index = 295, ..., 295 len=1
  evi_train.shape  = (2, 512, 512)
  evi_val.shape    = (1, 512, 512)
  time_features_train.shape  = (2, 6)
  time_features_val.shape    = (1, 6)
  labels_train.shape  = (2,)
  labels_val.shape    = (1,)
  (torch)evi_train.shape  = torch.Size([2, 1, 1, 512, 512])
  (torch)evi_val.shape    = torch.Size([1, 1, 1, 512, 512])


100%|██████████| 1/1 [00:00<00:00, 16.90it/s]


Epoch 1, Loss: 0.22057074308395386


100%|██████████| 1/1 [00:00<00:00, 17.82it/s]


Epoch 2, Loss: 0.28075557947158813


100%|██████████| 1/1 [00:00<00:00, 18.60it/s]


Epoch 3, Loss: 0.2492993175983429


100%|██████████| 1/1 [00:00<00:00, 19.12it/s]


Epoch 4, Loss: 0.2151544839143753


100%|██████████| 1/1 [00:00<00:00, 19.48it/s]


Epoch 5, Loss: 0.19372932612895966


100%|██████████| 1/1 [00:00<00:00, 19.71it/s]


Epoch 6, Loss: 0.17846587300300598


100%|██████████| 1/1 [00:00<00:00, 20.03it/s]


Epoch 7, Loss: 0.19508683681488037


100%|██████████| 1/1 [00:00<00:00, 19.32it/s]


Epoch 8, Loss: 0.1731199026107788


100%|██████████| 1/1 [00:00<00:00, 18.37it/s]


Epoch 9, Loss: 0.17069417238235474


100%|██████████| 1/1 [00:00<00:00, 17.76it/s]


Epoch 10, Loss: 0.1625077873468399


100%|██████████| 1/1 [00:00<00:00, 19.32it/s]


Epoch 11, Loss: 0.16995501518249512


100%|██████████| 1/1 [00:00<00:00, 19.06it/s]


Epoch 12, Loss: 0.16934813559055328


100%|██████████| 1/1 [00:00<00:00, 18.30it/s]


Epoch 13, Loss: 0.1687334030866623
Early stopping!
Validation Loss: 0.1641451120376587
Fold 3
       Train Index = 0, ..., 322 len=323
       Valid Index = 323, ..., 428 len=106
       Train Dates = 2014-02-23 00:00:00, ..., 2017-10-29 00:00:00 len=3
       Valid Dates = 2020-04-12 00:00:00, ..., 2020-04-12 00:00:00 len=1
  (get)Train Index = 103, ..., 295 len=3
  (get)Valid Index = 423, ..., 423 len=1
  evi_train.shape  = (3, 512, 512)
  evi_val.shape    = (1, 512, 512)
  time_features_train.shape  = (3, 6)
  time_features_val.shape    = (1, 6)
  labels_train.shape  = (3,)
  labels_val.shape    = (1,)
  (torch)evi_train.shape  = torch.Size([3, 1, 1, 512, 512])
  (torch)evi_val.shape    = torch.Size([1, 1, 1, 512, 512])


100%|██████████| 2/2 [00:00<00:00, 21.41it/s]


Epoch 1, Loss: 0.2611491531133652


100%|██████████| 2/2 [00:00<00:00, 23.43it/s]


Epoch 2, Loss: 0.2090604528784752


100%|██████████| 2/2 [00:00<00:00, 23.48it/s]


Epoch 3, Loss: 0.13958044722676277


100%|██████████| 2/2 [00:00<00:00, 23.76it/s]


Epoch 4, Loss: 0.137933898717165


100%|██████████| 2/2 [00:00<00:00, 23.38it/s]


Epoch 5, Loss: 0.14423464238643646


100%|██████████| 2/2 [00:00<00:00, 23.76it/s]


Epoch 6, Loss: 0.11506232991814613


100%|██████████| 2/2 [00:00<00:00, 23.03it/s]


Epoch 7, Loss: 0.08873882796615362


100%|██████████| 2/2 [00:00<00:00, 22.98it/s]


Epoch 8, Loss: 0.08751200418919325


100%|██████████| 2/2 [00:00<00:00, 23.83it/s]


Epoch 9, Loss: 0.08396967872977257


100%|██████████| 2/2 [00:00<00:00, 23.49it/s]


Epoch 10, Loss: 0.09809453040361404


100%|██████████| 2/2 [00:00<00:00, 23.33it/s]


Epoch 11, Loss: 0.11677070148289204


100%|██████████| 2/2 [00:00<00:00, 23.61it/s]


Epoch 12, Loss: 0.09645003080368042
Early stopping!
Validation Loss: 0.08816248178482056
Fold 4
       Train Index = 0, ..., 428 len=429
       Valid Index = 429, ..., 534 len=106
       Train Dates = 2014-02-23 00:00:00, ..., 2020-04-12 00:00:00 len=4
       Valid Dates = 2022-04-10 00:00:00, ..., 2022-04-10 00:00:00 len=1
  (get)Train Index = 103, ..., 423 len=4
  (get)Valid Index = 527, ..., 527 len=1
  evi_train.shape  = (4, 512, 512)
  evi_val.shape    = (1, 512, 512)
  time_features_train.shape  = (4, 6)
  time_features_val.shape    = (1, 6)
  labels_train.shape  = (4,)
  labels_val.shape    = (1,)
  (torch)evi_train.shape  = torch.Size([4, 1, 1, 512, 512])
  (torch)evi_val.shape    = torch.Size([1, 1, 1, 512, 512])


100%|██████████| 2/2 [00:00<00:00, 20.82it/s]


Epoch 1, Loss: 0.269936241209507


100%|██████████| 2/2 [00:00<00:00, 21.87it/s]


Epoch 2, Loss: 0.19428962469100952


100%|██████████| 2/2 [00:00<00:00, 21.70it/s]


Epoch 3, Loss: 0.16696243733167648


100%|██████████| 2/2 [00:00<00:00, 22.08it/s]


Epoch 4, Loss: 0.1566290333867073


100%|██████████| 2/2 [00:00<00:00, 22.10it/s]


Epoch 5, Loss: 0.1483641043305397


100%|██████████| 2/2 [00:00<00:00, 21.76it/s]


Epoch 6, Loss: 0.12761667370796204


100%|██████████| 2/2 [00:00<00:00, 21.49it/s]


Epoch 7, Loss: 0.1265462264418602


100%|██████████| 2/2 [00:00<00:00, 21.41it/s]


Epoch 8, Loss: 0.1067146398127079


100%|██████████| 2/2 [00:00<00:00, 21.47it/s]


Epoch 9, Loss: 0.09925145283341408


100%|██████████| 2/2 [00:00<00:00, 21.58it/s]


Epoch 10, Loss: 0.0929182767868042


100%|██████████| 2/2 [00:00<00:00, 22.09it/s]


Epoch 11, Loss: 0.08851020783185959


100%|██████████| 2/2 [00:00<00:00, 21.36it/s]


Epoch 12, Loss: 0.08798372000455856


100%|██████████| 2/2 [00:00<00:00, 21.92it/s]


Epoch 13, Loss: 0.08747614920139313


100%|██████████| 2/2 [00:00<00:00, 21.85it/s]


Epoch 14, Loss: 0.08700060658156872


100%|██████████| 2/2 [00:00<00:00, 21.39it/s]


Epoch 15, Loss: 0.08650781214237213


100%|██████████| 2/2 [00:00<00:00, 22.04it/s]


Epoch 16, Loss: 0.08600451052188873


100%|██████████| 2/2 [00:00<00:00, 21.50it/s]


Epoch 17, Loss: 0.0855204425752163


100%|██████████| 2/2 [00:00<00:00, 21.85it/s]


Epoch 18, Loss: 0.0850878581404686


100%|██████████| 2/2 [00:00<00:00, 21.64it/s]


Epoch 19, Loss: 0.08462583646178246


100%|██████████| 2/2 [00:00<00:00, 21.79it/s]


Epoch 20, Loss: 0.084206223487854


100%|██████████| 2/2 [00:00<00:00, 21.43it/s]


Epoch 21, Loss: 0.08387266099452972


100%|██████████| 2/2 [00:00<00:00, 21.75it/s]


Epoch 22, Loss: 0.08382625877857208


100%|██████████| 2/2 [00:00<00:00, 21.38it/s]


Epoch 23, Loss: 0.08378421887755394


100%|██████████| 2/2 [00:00<00:00, 21.74it/s]


Epoch 24, Loss: 0.08374135196208954


100%|██████████| 2/2 [00:00<00:00, 21.25it/s]


Epoch 25, Loss: 0.08369696885347366


100%|██████████| 2/2 [00:00<00:00, 21.27it/s]


Epoch 26, Loss: 0.08365502208471298


100%|██████████| 2/2 [00:00<00:00, 21.76it/s]


Epoch 27, Loss: 0.083614531904459


100%|██████████| 2/2 [00:00<00:00, 21.16it/s]


Epoch 28, Loss: 0.08357258886098862


100%|██████████| 2/2 [00:00<00:00, 21.25it/s]


Epoch 29, Loss: 0.08352800086140633


100%|██████████| 2/2 [00:00<00:00, 21.65it/s]


Epoch 30, Loss: 0.08348583057522774


100%|██████████| 2/2 [00:00<00:00, 21.80it/s]


Epoch 31, Loss: 0.08345488458871841


100%|██████████| 2/2 [00:00<00:00, 20.53it/s]


Epoch 32, Loss: 0.08345091342926025


100%|██████████| 2/2 [00:00<00:00, 20.94it/s]


Epoch 33, Loss: 0.08344615623354912


100%|██████████| 2/2 [00:00<00:00, 21.59it/s]


Epoch 34, Loss: 0.08344200626015663


100%|██████████| 2/2 [00:00<00:00, 21.87it/s]


Epoch 35, Loss: 0.08343765884637833


100%|██████████| 2/2 [00:00<00:00, 22.08it/s]


Epoch 36, Loss: 0.08343352377414703


100%|██████████| 2/2 [00:00<00:00, 21.47it/s]


Epoch 37, Loss: 0.08342905715107918


100%|██████████| 2/2 [00:00<00:00, 21.49it/s]


Epoch 38, Loss: 0.08342474699020386


100%|██████████| 2/2 [00:00<00:00, 21.36it/s]


Epoch 39, Loss: 0.08342060260474682


100%|██████████| 2/2 [00:00<00:00, 21.39it/s]


Epoch 40, Loss: 0.08341631852090359


100%|██████████| 2/2 [00:00<00:00, 21.65it/s]


Epoch 41, Loss: 0.08341292664408684


100%|██████████| 2/2 [00:00<00:00, 21.91it/s]


Epoch 42, Loss: 0.08341247960925102


100%|██████████| 2/2 [00:00<00:00, 21.28it/s]


Epoch 43, Loss: 0.08341202139854431


100%|██████████| 2/2 [00:00<00:00, 21.58it/s]


Epoch 44, Loss: 0.08341162092983723


100%|██████████| 2/2 [00:00<00:00, 21.55it/s]


Epoch 45, Loss: 0.08341103419661522


100%|██████████| 2/2 [00:00<00:00, 21.33it/s]


Epoch 46, Loss: 0.08341065049171448


100%|██████████| 2/2 [00:00<00:00, 22.09it/s]


Epoch 47, Loss: 0.08341028913855553


100%|██████████| 2/2 [00:00<00:00, 21.57it/s]


Epoch 48, Loss: 0.08340977132320404


100%|██████████| 2/2 [00:00<00:00, 21.97it/s]


Epoch 49, Loss: 0.08340926468372345


100%|██████████| 2/2 [00:00<00:00, 22.42it/s]


Epoch 50, Loss: 0.08340897969901562
Validation Loss: 0.248135045170784
Fold 5
       Train Index = 0, ..., 534 len=535
       Valid Index = 535, ..., 640 len=106
       Train Dates = 2014-02-23 00:00:00, ..., 2022-04-10 00:00:00 len=5
       Valid Dates = 2022-07-31 00:00:00, ..., 2023-12-17 00:00:00 len=8
  (get)Train Index = 103, ..., 527 len=5
  (get)Valid Index = 543, ..., 615 len=8
  evi_train.shape  = (5, 512, 512)
  evi_val.shape    = (8, 512, 512)
  time_features_train.shape  = (5, 6)
  time_features_val.shape    = (8, 6)
  labels_train.shape  = (5,)
  labels_val.shape    = (8,)
  (torch)evi_train.shape  = torch.Size([5, 1, 1, 512, 512])
  (torch)evi_val.shape    = torch.Size([8, 1, 1, 512, 512])


100%|██████████| 3/3 [00:00<00:00, 22.94it/s]


Epoch 1, Loss: 0.16929728537797928


100%|██████████| 3/3 [00:00<00:00, 23.57it/s]


Epoch 2, Loss: 0.17360803236564


100%|██████████| 3/3 [00:00<00:00, 23.41it/s]


Epoch 3, Loss: 0.17965763062238693


100%|██████████| 3/3 [00:00<00:00, 23.35it/s]


Epoch 4, Loss: 0.14718342572450638


100%|██████████| 3/3 [00:00<00:00, 23.09it/s]


Epoch 5, Loss: 0.13779729108015695


100%|██████████| 3/3 [00:00<00:00, 23.20it/s]


Epoch 6, Loss: 0.12136135001977284


100%|██████████| 3/3 [00:00<00:00, 23.96it/s]


Epoch 7, Loss: 0.11471111327409744


100%|██████████| 3/3 [00:00<00:00, 23.48it/s]


Epoch 8, Loss: 0.10664999485015869


100%|██████████| 3/3 [00:00<00:00, 23.89it/s]


Epoch 9, Loss: 0.12894625589251518


100%|██████████| 3/3 [00:00<00:00, 23.26it/s]


Epoch 10, Loss: 0.09851867457230885


100%|██████████| 3/3 [00:00<00:00, 23.12it/s]


Epoch 11, Loss: 0.09437990933656693


100%|██████████| 3/3 [00:00<00:00, 23.10it/s]


Epoch 12, Loss: 0.10975918422142665


100%|██████████| 3/3 [00:00<00:00, 23.77it/s]


Epoch 13, Loss: 0.09370982026060422


100%|██████████| 3/3 [00:00<00:00, 23.50it/s]


Epoch 14, Loss: 0.0944518509010474


100%|██████████| 3/3 [00:00<00:00, 23.31it/s]


Epoch 15, Loss: 0.09313013901313145


100%|██████████| 3/3 [00:00<00:00, 23.83it/s]


Epoch 16, Loss: 0.08456646542375286


100%|██████████| 3/3 [00:00<00:00, 23.11it/s]


Epoch 17, Loss: 0.08431454251209895


100%|██████████| 3/3 [00:00<00:00, 23.23it/s]


Epoch 18, Loss: 0.09234229226907094


100%|██████████| 3/3 [00:00<00:00, 23.18it/s]


Epoch 19, Loss: 0.10780388365189235


100%|██████████| 3/3 [00:00<00:00, 23.15it/s]


Epoch 20, Loss: 0.10758685568968455
Early stopping!
Validation Loss: 0.14833451341837645
Average MSE: 0.17643526196479797
Average RMSE: 0.41369152069091797
Average MAE: 0.3471130132675171
Average R-squared: -39942233052372.36


In [12]:
# save model to file
torch.save(model.state_dict(), "diego-bad-model.pt")
del model

## Inference

In [13]:
# load in model from file
inf_model_weights = torch.load("diego-bad-model.pt", weights_only=True)
inf_model = HybridModel(CNNFeatureExtractor())
inf_model.load_state_dict(inf_model_weights)
inf_model.to(device)
inf_model.eval()
inf_output = inf_model(evi_val, time_features_val)

print(f"{evi_val.shape = }")
print(f"{time_features_val.shape = }")
print(f"{inf_output.shape = }")

evi_val.shape = torch.Size([8, 1, 1, 512, 512])
time_features_val.shape = torch.Size([8, 6])
inf_output.shape = torch.Size([8, 512, 512])


What are the shapes above? Specifically the input?

[8, 1, 1, 512, 512]

[n_samples, ?, ?, width, height]


## Train on full dataset

In [14]:

# define the model 
model = HybridModel(CNNFeatureExtractor())
model.apply(weights_init)
model.to(device)

# define training criteria
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
criterion = nn.MSELoss()

In [15]:
# create full datasets for training
# make validation a lil bit smaller so we get a "better" model for inference?

# make our own train_index and val_index for now
n_train_samples = int(len(yield_data_weekly) * 0.9)
n_val_samples = len(yield_data_weekly) - n_train_samples
indices = np.arange(0, len(yield_data_weekly), 1, dtype=int)
train_index = indices[0:n_train_samples]
val_index = indices[n_train_samples:]
print(f"n_train_samples({len(train_index)}) + n_val_samples({len(val_index)}) == len(yield_weekly_data)({len(yield_data_weekly)}) = {len(train_index)+len(val_index) == len(yield_data_weekly)}")


# print(f"Train Indexes = {train_index}")
# print(f"Valid Indexes = {val_index}")

train_dates = yield_data_weekly.index[train_index].intersection(evi_data_dict.keys())
# val_dates = yield_data_weekly.index[val_index].intersection(evi_data_dict.keys())
# print(f"       Train Dates = {train_dates[0]}, ..., {train_dates[-1]} len={len(train_dates)}")
# print(f"       Valid Dates = {val_dates[0]}, ..., {val_dates[-1]} len={len(val_dates)}")

# train_index = yield_data_weekly.index.get_indexer(train_dates)
# val_index = yield_data_weekly.index.get_indexer(val_dates)
# print(f"  (get)Train Index = {train_index[0]}, ..., {train_index[-1]} len={len(train_index)}")
# print(f"  (get)Valid Index = {val_index[0]}, ..., {val_index[-1]} len={len(val_index)}")

# evi_train = np.array([evi_data_dict[date] for date in train_dates])
# evi_val = np.array([evi_data_dict[date] for date in val_dates])
# print(f"  evi_train.shape  = {evi_train.shape}")
# print(f"  evi_val.shape    = {evi_val.shape}")

# time_features_train = yield_data_weekly.loc[train_dates][['month_sin', 'month_cos', 'day_of_year_sin', 'day_of_year_cos', 'Volume (Pounds)', 'Cumulative Volumne (Pounds)']].values
# time_features_val = yield_data_weekly.loc[val_dates][['month_sin', 'month_cos', 'day_of_year_sin', 'day_of_year_cos', 'Volume (Pounds)', 'Cumulative Volumne (Pounds)']].values
# labels_train = yield_data_weekly.loc[train_dates]['Volume (Pounds)'].values
# labels_val = yield_data_weekly.loc[val_dates]['Volume (Pounds)'].values

# evi_train = torch.tensor(evi_train, dtype=torch.float32).unsqueeze(1).unsqueeze(2).to(device)
# time_features_train = torch.tensor(time_features_train, dtype=torch.float32).to(device)
# time_features_val = torch.tensor(time_features_val, dtype=torch.float32).to(device)
# labels_train = torch.tensor(labels_train, dtype=torch.float32).to(device)
# labels_val = torch.tensor(labels_val, dtype=torch.float32).to(device)

# train_loader = DataLoader(list(zip(evi_train, labels_train, time_features_train)), batch_size=2, shuffle=True)
# val_loader = DataLoader(list(zip(evi_val, labels_val, time_features_val)), batch_size=2, shuffle=False)


n_train_samples(576) + n_val_samples(65) == len(yield_weekly_data)(641) = True


In [16]:
evi_data_dict.keys()

dict_keys([Timestamp('2014-02-23 00:00:00'), Timestamp('2016-04-17 00:00:00'), Timestamp('2017-10-29 00:00:00'), Timestamp('2020-04-12 00:00:00'), Timestamp('2022-04-10 00:00:00'), Timestamp('2022-07-31 00:00:00'), Timestamp('2022-09-25 00:00:00'), Timestamp('2022-11-20 00:00:00'), Timestamp('2023-03-12 00:00:00'), Timestamp('2023-05-07 00:00:00'), Timestamp('2023-07-02 00:00:00'), Timestamp('2023-10-22 00:00:00'), Timestamp('2023-12-17 00:00:00')])

In [17]:
yield_data_weekly.index[train_index]

DatetimeIndex(['2012-03-04', '2012-03-11', '2012-03-18', '2012-03-25',
               '2012-04-01', '2012-04-08', '2012-04-15', '2012-04-22',
               '2012-04-29', '2012-05-06',
               ...
               '2023-01-08', '2023-01-15', '2023-01-22', '2023-01-29',
               '2023-02-05', '2023-02-12', '2023-02-19', '2023-02-26',
               '2023-03-05', '2023-03-12'],
              dtype='datetime64[ns]', name='Date', length=576, freq=None)