In [1]:
import pandas as pd
import numpy as np

import rasterio
from skimage.transform import resize
from skimage.transform import rotate
import os

import torch
from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.model_selection import train_test_split

from datetime import timedelta
from skimage.draw import polygon
import matplotlib.pyplot as plt

from shapely.geometry import Polygon

from utils import process_yield_data
from pathlib import Path

import matplotlib.pyplot as plt

#### Import Yield Data

In [2]:
YIELD_DATA_PATH = Path("./combined_yield_data.csv")
yield_data_weekly = process_yield_data(YIELD_DATA_PATH)

            Volume (Pounds)  Cumulative Volumne (Pounds)  Pounds/Acre
Date                                                                 
2012-01-02          23400.0                      23400.0          2.0
2012-01-03          26064.0                      49464.0          3.0
2012-01-04          32382.0                      81846.0          3.0
2012-01-05          69804.0                     151650.0          7.0
2012-01-06          18000.0                     169650.0          2.0

Number of Yield Data Points:  3970

Column Names: Index(['Volume (Pounds)', 'Cumulative Volumne (Pounds)', 'Pounds/Acre'], dtype='object')
Number of Yield Data Points: 2879
Yield data with time features:
            Volume (Pounds)  Cumulative Volumne (Pounds)  Pounds/Acre  \
Date                                                                    
2012-03-04         525753.0                    1785843.0    18.333333   
2012-03-11        2949534.0                    4735377.0    51.666667   
2012-03-18   

#### Define the Model

In [3]:
target_shape = (512, 512)
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using {device} device")

Using mps device


### Old Model

In [4]:
class CNNFeatureExtractor(nn.Module):
    def __init__(self):
        super(CNNFeatureExtractor, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.flattened_size = self._get_conv_output((1, *target_shape))
        self.fc1 = nn.Linear(self.flattened_size, 512)

    def _get_conv_output(self, shape):
        x = torch.rand(1, *shape)
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        n_size = x.view(1, -1).size(1)
        return n_size

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = self.dropout(x)
        x = x.view(-1, self.flattened_size)
        x = F.relu(self.fc1(x))
        return x
    
class HybridModel(nn.Module):
    def __init__(self, cnn_feature_extractor, lstm_hidden_size=64, lstm_layers=1):
        super(HybridModel, self).__init__()
        self.cnn = cnn_feature_extractor
        self.lstm = nn.LSTM(input_size=512, hidden_size=lstm_hidden_size, num_layers=lstm_layers, batch_first=True)
        self.fc1 = nn.Linear(lstm_hidden_size + 4, 64)
        self.fc2 = nn.Linear(64, target_shape[0] * target_shape[1])  # Predict a value per pixel
        self.target_shape = target_shape

    def forward(self, x, time_features):
        batch_size, time_steps, C, H, W = x.size()
        c_in = x.view(batch_size * time_steps, C, H, W)
        c_out = self.cnn(c_in)
        r_in = c_out.view(batch_size, time_steps, -1)
        r_out, (h_n, c_n) = self.lstm(r_in)
        r_out = r_out[:, -1, :]
        x = torch.cat((r_out, time_features), dim=1)  # Concatenate LSTM output with time features
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.view(batch_size, *self.target_shape)  # Reshape to the target shape
        return x

#### Initialize Function

In [5]:
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

# # Instantiate model with weight decay regularization
# cnn_feature_extractor = CNNFeatureExtractor()
# model = HybridModel(cnn_feature_extractor)
# model.apply(weights_init)
# model.to(device)

batch_size = 16
epochs = 50

# criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

### Functions for prediction

In [6]:
from inference_utils import (
    preprocess_image,
    compute_mean_std,
    load_evi_data_and_prepare_features,
    find_closest_date,
    find_closest_date_in_df,
    mask_evi_data,
    predict,
    predict_weekly_yield,
    augment_image,
    prepare_dataset,
    train_and_evaluate,
    sync_evi_yield_data,
    CustomDataset,
    load_evi_data,
    find_common_date_range
)


In [7]:
# Load EVI data and prepare time features
evi_data_dir = "./landsat_evi_monterey_masked"
train_loader, val_loader, mean, std = prepare_dataset(evi_data_dir, yield_data_weekly, target_shape, augment=True)

Processed file 1/84 in 2.33s
Processed file 2/84 in 2.38s


### Model Evaluation (Cross Validation)

In [None]:
# Initialize TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

mse_scores = []
rmse_scores = []
mae_scores = []
r2_scores = []

# Lists to store losses for plotting
all_train_losses = []
all_val_losses = []

epochs = 50
patience = 5 
early_stop = False

# Cross-validation with train_and_evaluate
for fold, (train_index, val_index) in enumerate(tscv.split(yield_data_weekly_filtered)):
    print(f"Fold {fold + 1}")

    # Check if indices are within bounds
    if max(train_index) >= len(dataset) or max(val_index) >= len(dataset):
        print(f"Error: Indices out of range for fold {fold + 1}")
        continue 

    # Create Subsets for the current fold
    fold_train_subset = torch.utils.data.Subset(dataset, train_index)
    fold_val_subset = torch.utils.data.Subset(dataset, val_index)

    # DataLoaders for the current fold
    fold_train_loader = DataLoader(fold_train_subset, batch_size=batch_size, shuffle=True)
    fold_val_loader = DataLoader(fold_val_subset, batch_size=batch_size, shuffle=False)

    # Instantiate a new model for each fold
    model = HybridModel(CNNFeatureExtractor())
    model.apply(weights_init)
    model.to(device)

    # Set up the optimizer, scheduler, and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    criterion = nn.MSELoss()

    # Train and evaluate the model using the function
    val_loss, train_losses, val_losses = train_and_evaluate(
        model, fold_train_loader, fold_val_loader, optimizer, scheduler, criterion, epochs, device
    )
    
    # Store the train and val losses for visualization
    all_train_losses.append(train_losses)
    all_val_losses.append(val_losses)

    # Model evaluation on the validation set
    model.eval()
    with torch.no_grad():
        outputs_val = []
        labels_val = []
        for evi_batch, label_batch, time_features_batch, timestamp in fold_val_loader:
            evi_batch, label_batch, time_features_batch = evi_batch.to(device), label_batch.to(device), time_features_batch.to(device)
            outputs_batch = model(evi_batch, time_features_batch) # lbs/pixel
            outputs_val.extend(outputs_batch.cpu().numpy().flatten())
            label_batch = label_batch.unsqueeze(1).unsqueeze(2).expand(-1, target_shape[0], target_shape[1])
            labels_val.extend(label_batch.cpu().numpy().flatten())

    # Flatten the outputs and labels
    outputs_val = np.array(outputs_val)
    labels_val = np.array(labels_val)

    # Calculate val metrics
    mse = mean_squared_error(labels_val, outputs_val)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(labels_val, outputs_val)
    r2 = r2_score(labels_val, outputs_val)

    mse_scores.append(mse)
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    r2_scores.append(r2)

# Print results
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Average MAE: {np.mean(mae_scores)}")
print(f"Average R-squared: {np.mean(r2_scores)}")

Fold 1
# of samples - Training   - 92
# of samples - Validation - 92


100%|██████████| 6/6 [00:12<00:00,  2.15s/it]


Epoch 1, Loss: 0.1392750802139441




Validation Loss: 0.15088109113276005


100%|██████████| 6/6 [00:13<00:00,  2.32s/it]


Epoch 2, Loss: 0.059082427993416786




Validation Loss: 0.13904905691742897


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


Epoch 3, Loss: 0.04768019045392672




Validation Loss: 0.12922726882000765


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 4, Loss: 0.03847360424697399




Validation Loss: 0.12122050548593204


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 5, Loss: 0.031420035287737846




Validation Loss: 0.11493411132444938


100%|██████████| 6/6 [00:13<00:00,  2.22s/it]


Epoch 6, Loss: 0.025797039270401




Validation Loss: 0.110035658814013


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 7, Loss: 0.02143398703386386




Validation Loss: 0.10613545356318355


100%|██████████| 6/6 [00:13<00:00,  2.28s/it]


Epoch 8, Loss: 0.018007692880928516




Validation Loss: 0.1030192665445308


100%|██████████| 6/6 [00:14<00:00,  2.40s/it]


Epoch 9, Loss: 0.015197766479104757




Validation Loss: 0.1004763983655721


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 10, Loss: 0.012881379729757706




Validation Loss: 0.09834382326031725


100%|██████████| 6/6 [00:12<00:00,  2.14s/it]


Epoch 11, Loss: 0.011081621206055084




Validation Loss: 0.09656183359523614


100%|██████████| 6/6 [00:13<00:00,  2.18s/it]


Epoch 12, Loss: 0.009470662179713448




Validation Loss: 0.09507046205302079


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 13, Loss: 0.0080610989437749




Validation Loss: 0.0938120053615421


100%|██████████| 6/6 [00:13<00:00,  2.30s/it]


Epoch 14, Loss: 0.006942073271299402




Validation Loss: 0.09274125064257532


100%|██████████| 6/6 [00:13<00:00,  2.28s/it]


Epoch 15, Loss: 0.006069037675236662




Validation Loss: 0.0918656592645372


100%|██████████| 6/6 [00:13<00:00,  2.21s/it]


Epoch 16, Loss: 0.005250375407437484




Validation Loss: 0.09110569635716577


100%|██████████| 6/6 [00:13<00:00,  2.22s/it]


Epoch 17, Loss: 0.0045215664043401676




Validation Loss: 0.09046132559888065


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


Epoch 18, Loss: 0.0039587978584071




Validation Loss: 0.08991339181860288


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


Epoch 19, Loss: 0.003511349942224721




Validation Loss: 0.08944726968184114


100%|██████████| 6/6 [00:13<00:00,  2.23s/it]


Epoch 20, Loss: 0.0030762553215026855




Validation Loss: 0.08904255616168182


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


Epoch 21, Loss: 0.0027088017280523977




Validation Loss: 0.08869363482032593


100%|██████████| 6/6 [00:13<00:00,  2.17s/it]


Epoch 22, Loss: 0.002365640209366878




Validation Loss: 0.08839250441330175


100%|██████████| 6/6 [00:13<00:00,  2.22s/it]


Epoch 23, Loss: 0.0021392160172884664




Validation Loss: 0.08814054507335338


100%|██████████| 6/6 [00:12<00:00,  2.15s/it]


Epoch 24, Loss: 0.0019075937646751602




Validation Loss: 0.08791398036798152


100%|██████████| 6/6 [00:12<00:00,  2.06s/it]


Epoch 25, Loss: 0.0017018378324185808




Validation Loss: 0.0877148267463781


100%|██████████| 6/6 [00:12<00:00,  2.12s/it]


Epoch 26, Loss: 0.001532158562137435




Validation Loss: 0.08754231201601215


100%|██████████| 6/6 [00:13<00:00,  2.21s/it]


Epoch 27, Loss: 0.001377836432463179




Validation Loss: 0.0873864888950872


100%|██████████| 6/6 [00:12<00:00,  2.03s/it]


Epoch 28, Loss: 0.0012318506487645209




Validation Loss: 0.08725249197353453


100%|██████████| 6/6 [00:12<00:00,  2.01s/it]


Epoch 29, Loss: 0.0010997917270287871




Validation Loss: 0.08713253090293922


100%|██████████| 6/6 [00:12<00:00,  2.03s/it]


Epoch 30, Loss: 0.0009909336125322927




Validation Loss: 0.08702898602738667


100%|██████████| 6/6 [00:12<00:00,  2.05s/it]


Epoch 31, Loss: 0.0009142217168118805




Validation Loss: 0.08742140980515008


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


Epoch 32, Loss: 0.0009022974700201303




Validation Loss: 0.08685526343955037


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


Epoch 33, Loss: 0.0007497792151601365




Validation Loss: 0.08678160300769377


100%|██████████| 6/6 [00:12<00:00,  2.10s/it]


Epoch 34, Loss: 0.0006819163584926476




Validation Loss: 0.08671519188404393


100%|██████████| 6/6 [00:13<00:00,  2.22s/it]


Epoch 35, Loss: 0.0006311551163283488




Validation Loss: 0.08665586352193107


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


Epoch 36, Loss: 0.0005653277961149191




Validation Loss: 0.08660182503808755


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


Epoch 37, Loss: 0.0005211208820886289




Validation Loss: 0.08655407946450093


100%|██████████| 6/6 [00:13<00:00,  2.31s/it]


Epoch 38, Loss: 0.0004797028814209625




Validation Loss: 0.08651077787847801


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


Epoch 39, Loss: 0.00044470799427169066




Validation Loss: 0.0864724761825831


100%|██████████| 6/6 [00:13<00:00,  2.21s/it]


Epoch 40, Loss: 0.00040765097461796057




Validation Loss: 0.08643727105421324


100%|██████████| 6/6 [00:13<00:00,  2.18s/it]


Epoch 41, Loss: 0.00037517470385258395




Validation Loss: 0.08640456290837999


100%|██████████| 6/6 [00:12<00:00,  2.10s/it]


Epoch 42, Loss: 0.00034566652417803806




Validation Loss: 0.08637501748065309


100%|██████████| 6/6 [00:12<00:00,  2.06s/it]


Epoch 43, Loss: 0.00031797963796028245




Validation Loss: 0.08634877066288027


100%|██████████| 6/6 [00:13<00:00,  2.17s/it]


Epoch 44, Loss: 0.0002932548280417298




Validation Loss: 0.08632458194188075


100%|██████████| 6/6 [00:12<00:00,  2.14s/it]


Epoch 45, Loss: 0.00027206955807438743




Validation Loss: 0.086302960380029


100%|██████████| 6/6 [00:12<00:00,  2.15s/it]


Epoch 46, Loss: 0.00025676348013803363




Validation Loss: 0.08628252052343062


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


Epoch 47, Loss: 0.00023459245858248323




Validation Loss: 0.08626394491269214


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


Epoch 48, Loss: 0.00021819792891619727




Validation Loss: 0.08624700288297997


100%|██████████| 6/6 [00:13<00:00,  2.18s/it]


Epoch 49, Loss: 0.00020215726544847712




Validation Loss: 0.0862312904864666


100%|██████████| 6/6 [00:13<00:00,  2.19s/it]


Epoch 50, Loss: 0.00018816771868538731
Validation Loss: 0.08621723754913546




TypeError: cannot unpack non-iterable float object

### Plot Loss Across Folds

In [None]:
for fold, (train_losses, val_losses) in enumerate(zip(all_train_losses, all_val_losses)):
    plt.plot(train_losses, label=f'Train Loss Fold {fold + 1}')
    plt.plot(val_losses, label=f'Val Loss Fold {fold + 1}', linestyle='--')

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Across Folds')
plt.legend()
plt.grid(True)
plt.show()

# Train on full dataset

In [None]:
# Instantiate a new model for each fold
model = HybridModel(CNNFeatureExtractor())
model.apply(weights_init)
model.to(device)

# Set up the optimizer, scheduler, and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
criterion = nn.MSELoss()

# Train and evaluate the model
val_loss = train_and_evaluate(model, train_loader, val_loader, optimizer, scheduler, criterion, epochs, device)

torch.save(model.state_dict(), "./trained-full-dataset-yield-density-no-leakage.pt")


# of samples - Training   - 510
# of samples - Validation - 128


  evi_sequence = torch.tensor(evi_sequence, dtype=torch.float32).unsqueeze(1)
100%|██████████| 128/128 [01:03<00:00,  2.01it/s]


Epoch 1, Loss: 0.02303801325973609




Validation Loss: 0.13716325513087213


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 2, Loss: 0.00021912637054555262




Validation Loss: 0.13683857419528067


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 3, Loss: 4.714069107225605e-05




Validation Loss: 0.13679931915248744


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 4, Loss: 1.2139776693148585e-05




Validation Loss: 0.13678972469642758


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 5, Loss: 2.449644118566064e-06




Validation Loss: 0.13678752846317366


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 6, Loss: 7.677901634311471e-07




Validation Loss: 0.13678681483725086


100%|██████████| 128/128 [01:00<00:00,  2.10it/s]


Epoch 7, Loss: 3.032403498042413e-07




Validation Loss: 0.13678655843250453


100%|██████████| 128/128 [01:01<00:00,  2.10it/s]


Epoch 8, Loss: 1.6920563471189976e-07




Validation Loss: 0.13678646262269467


100%|██████████| 128/128 [01:00<00:00,  2.10it/s]


Epoch 9, Loss: 1.0260573640036297e-07




Validation Loss: 0.13678642455488443


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 10, Loss: 5.5787742407435725e-08




Validation Loss: 0.1367864032217767


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 11, Loss: 2.9276606201833477e-08




Validation Loss: 0.13678638232522644


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 12, Loss: 1.2652823795644697e-08




Validation Loss: 0.1367863641353324


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 13, Loss: 4.771049414917239e-09




Validation Loss: 0.13678636751137674


100%|██████████| 128/128 [01:00<00:00,  2.12it/s]


Epoch 14, Loss: 1.5382479281230571e-09




Validation Loss: 0.13678635086398572


100%|██████████| 128/128 [01:00<00:00,  2.12it/s]


Epoch 15, Loss: 4.786846412416397e-10




Validation Loss: 0.13678636238910258


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 16, Loss: 1.3792824101279723e-10




Validation Loss: 0.1367863685300108


100%|██████████| 128/128 [01:00<00:00,  2.12it/s]


Epoch 17, Loss: 3.7930990881324296e-11




Validation Loss: 0.1367863482737448


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 18, Loss: 1.030097532846283e-11




Validation Loss: 0.13678636791883036


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 19, Loss: 3.4419171131595274e-12




Validation Loss: 0.1367863736813888


100%|██████████| 128/128 [01:00<00:00,  2.11it/s]


Epoch 20, Loss: 1.862709280804205e-12




Validation Loss: 0.13678635843098164


100%|██████████| 128/128 [01:00<00:00,  2.12it/s]


Epoch 21, Loss: 1.5365051628925621e-12




Validation Loss: 0.1367863556370139


100%|██████████| 128/128 [01:00<00:00,  2.12it/s]


Epoch 22, Loss: 1.483005749043814e-12
Validation Loss: 0.1367863569757901
Early stopping!




In [None]:
# Loss Curve
# Assuming you have these lists from your training process
train_losses = []  # Fill this with training loss for each epoch
val_losses = []    # Fill this with validation loss for each epoch

# Plotting the loss curves
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss over Epochs')
plt.legend()
plt.grid(True)
plt.show()

# Predicted vs. Actual Values
predicted_values = []  # Fill this with predicted values
actual_values = []     # Fill this with actual values

# Plotting predicted vs actual values
plt.figure(figsize=(10, 5))
plt.scatter(actual_values, predicted_values, alpha=0.5)
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Predicted vs. Actual Values')
plt.grid(True)
plt.plot([min(actual_values), max(actual_values)], [min(actual_values), max(actual_values)], 'r')  # Line y=x
plt.show()

# Residuals Plot
residuals = np.array(actual_values) - np.array(predicted_values)

# Plotting the residuals
plt.figure(figsize=(10, 5))
plt.scatter(range(len(residuals)), residuals, alpha=0.5)
plt.axhline(y=0, color='r', linestyle='-')
plt.xlabel('Index')
plt.ylabel('Residual')
plt.title('Residuals of Predictions')
plt.grid(True)
plt.show()

# Inference

In [None]:
import joblib

# load in model from file
# inf_model_weights = torch.load("trained-full-dataset.pt", weights_only=True)
inf_model_weights = torch.load("trained-full-dataset-yield-density-no-leakage.pt", weights_only=True)
inf_model = HybridModel(CNNFeatureExtractor())
inf_model.load_state_dict(inf_model_weights)
inf_model.to(device)
inf_model.eval()

scaler = joblib.load("yield_scaler.save")

In [None]:

# inf_output = inf_model(evi_val, time_features_val)

# print(f"{evi_val.shape = }")
# print(f"{time_features_val.shape = }")
# print(f"{inf_output.shape = }")

In [None]:
yield_data_weekly.iloc[0].name

Timestamp('2012-03-04 00:00:00')

In [None]:

evi_data_dir = "./landsat_evi_monterey_masked"
dataset_loader, _, mean, std = prepare_dataset(evi_data_dir, yield_data_weekly, target_shape, augment=True, full=True)

Processed file 1/83 in 4.125449s
Processed file 2/83 in 4.246559s
Processed file 3/83 in 4.898093s
Processed file 4/83 in 4.672359s
Processed file 5/83 in 5.460909s
Processed file 6/83 in 4.038937s
Processed file 7/83 in 3.490030s
Processed file 8/83 in 4.127463s
Processed file 9/83 in 3.800272s
Processed file 10/83 in 3.523895s
Processed file 11/83 in 3.932964s
Processed file 12/83 in 3.734574s
Processed file 13/83 in 4.148887s
Processed file 14/83 in 4.548136s
Processed file 15/83 in 3.748950s
Processed file 16/83 in 3.109021s
Processed file 17/83 in 4.015578s
Processed file 18/83 in 4.144617s
Processed file 19/83 in 3.917441s
Processed file 20/83 in 4.372727s
Processed file 21/83 in 4.537723s
Processed file 22/83 in 3.373776s
Processed file 23/83 in 3.701019s
Processed file 24/83 in 3.277262s
Processed file 25/83 in 3.982489s
Processed file 26/83 in 4.063423s
Processed file 27/83 in 3.969857s
Processed file 28/83 in 3.823087s
Processed file 29/83 in 4.417579s
Processed file 30/83 in

In [None]:
timestamps = torch.Tensor()
yield_labels = torch.Tensor()
predictions = torch.Tensor()

for idx, (inputs, labels, time_features, timestamp) in enumerate(dataset_loader):
    print(f"Running inference... {idx/len(dataset_loader)*100:.2f}%", end='\r')
    inputs, labels, time_features = inputs.to(device), labels.to(device), time_features.to(device)
    outputs = inf_model(inputs, time_features)
    summed_outputs = outputs.sum(dim=(1,2))

    if idx >0:
        break
    timestamps = torch.cat((timestamps, timestamp))
    yield_labels = torch.cat((yield_labels, labels.to("cpu")))
    predictions = torch.cat((predictions, summed_outputs.to("cpu")))

    # loss = criterion(outputs, labels)
    # val_loss += loss.item()

# val_loss /= len(val_loader)
# print(f'Validation Loss: {val_loss}')

Running inference... 0.62%

In [None]:
yield_labels.reshape(-1,1)

tensor([0.4385, 0.8434, 0.4935, 0.0000])

In [None]:
scaler.inverse_transform(yield_labels.reshape(-1, 1))

array([[20429036.3072927 ],
       [39287413.27969694],
       [22990931.39602876],
       [       0.        ]])

In [None]:
scaler.inverse_transform(predictions.detach().numpy().reshape(-1,1))

array([[1023663.94],
       [1020795.2 ],
       [1033442.56],
       [ 977205.5 ]], dtype=float32)

In [None]:
yield_labels

tensor([0.4385, 0.8434, 0.4935, 0.0000])

In [None]:
yield_data_weekly

Unnamed: 0_level_0,Volume (Pounds),Cumulative Volumne (Pounds),Pounds/Acre,month_sin,month_cos,day_of_year_sin,day_of_year_cos
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-03-04,0.011286,1785843.0,18.333333,1.000000e+00,6.123234e-17,0.891981,0.452072
2012-03-11,0.063317,4735377.0,51.666667,1.000000e+00,6.123234e-17,0.939856,0.341571
2012-03-18,0.102446,9507645.0,83.500000,1.000000e+00,6.123234e-17,0.974100,0.226116
2012-03-25,0.067456,12649959.0,55.000000,1.000000e+00,6.123234e-17,0.994218,0.107381
2012-04-01,0.134627,18921357.0,93.857143,8.660254e-01,-5.000000e-01,0.999917,-0.012910
...,...,...,...,...,...,...,...
2024-05-12,0.767907,682790517.0,305.285714,5.000000e-01,-8.660254e-01,0.752667,-0.658402
2024-05-19,0.787426,682790517.0,365.166667,5.000000e-01,-8.660254e-01,0.668064,-0.744104
2024-05-26,0.827681,682790517.0,329.285714,5.000000e-01,-8.660254e-01,0.573772,-0.819015
2024-06-02,0.796377,682790517.0,316.571429,1.224647e-16,-1.000000e+00,0.471160,-0.882048


In [None]:
timestamps, yield_labels, predictions

In [None]:
yield_data_weekly

In [None]:
out_df = pd.DataFrame(data={"timestamp":timestamps.to_numpy(), "prediction":predictions.to_numpy(), "truth":yield_labels.to_numpy()})
out_df.to_csv("out.csv")