# EXERCISE 7.2: Deep Learning for Crop Yield Estimation - Experiment Tracking

---

**Use of Google Earth Engine with US geodata to train a CNN in pytorch to predict crop yield Crop yield prediction in a systematized experiment tracking environment employing WandB**





In [None]:
!pip install geemap -qqq

In [None]:
!pip install wandb -qqq

## Setup
Before working on this Exercise setup a GCP project with GEE and Google Drive APIs enabled by following the instructions given at https://docs.google.com/document/d/13SKLn_mqhlaRc1gElr4kmBrkw6KZPeqDDW3AjcTr8YY/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import ee
import time
import sys
import numpy as np
import pandas as pd
import itertools
import os
import traceback
import urllib
import folium

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.service_account import ServiceAccountCredentials

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize
import torch.nn as nn
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split
from tqdm import tqdm
from collections import defaultdict
import geemap
import wandb
from torchvision.utils import make_grid

## Setup Your Google Earth Engine Credentials
Upload the `.private-key.json` you created while setting up GEE to the current runtime. Click Files > Upload to Session storage on the left pane in this notebook to upload. <br/>
Replace the service account in the code below with your Google Cloud project service account email. It should be of the format <br/>`<id>@ml4eo-<some_number>.iam.gserviceaccount.com`

In [None]:
service_account = 'ml4eo-service@ml4eo-383508.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '.private-key.json')
ee.Initialize(credentials)


# Discussion
Before we start:
Is crop yield estimation a classification or a regression task? Why?

In [None]:
wandb.login()

In [None]:
# Load the data
with np.load("/content/drive/MyDrive/histogram_all_full.npz") as hist:
    images = hist["output_image"]
    locations = hist["output_locations"]
    yields = hist["output_yield"]
    years = hist["output_year"]
    indices = hist["output_index"]


In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

## Image Standardization
We will whiten all images by ensuring the mean of the images is zero. The following function does that.

The mean is computed on the training images.

In [None]:
pred_years = [2012]

In [None]:
def normalize_images(train_images, val_images):
    mean = np.mean(train_images, axis=(0, 2, 3))

    train_images = (train_images.transpose(0, 2, 3, 1) - mean).transpose(0, 3, 1, 2)
    val_images = (val_images.transpose(0, 2, 3, 1) - mean).transpose(0, 3, 1, 2)

    return train_images, val_images

# Define the Config Dictionary for Wandb

In [None]:
config = {
    'conv1': {
        'in_chan': 9,
        'out_chan': 128,
        'stride': (1, 1),
        'kernel_size': (3, 3),

    },
    'conv2': {
        'in_chan': 128,
        'out_chan': 256,
        'stride': (2, 2),
        'kernel_size': (3, 3),

    },
    'conv3': {
        'in_chan': 256,
        'out_chan': 256,
        'stride': (1, 1),
        'kernel_size': (3, 3),

    },
    'conv4': {
        'in_chan': 256,
        'out_chan': 512,
        'stride': (2, 2),
        'kernel_size': (3, 3),

    },
    'conv5': {
        'in_chan': 512,
        'out_chan': 512,
        'stride': (1, 1),
        'kernel_size': (3,3),
    },
    'conv6': {
        'in_chan': 512,
        'out_chan': 512,
        'stride': (2, 2),
        'kernel_size': (3, 3),

    },
    'fc1_in': 8192,
    'fc1_out': 2048,
    'fc2_out': 2,
    'dropout_rate': 0.5,
    'lr': 0.01,
    'train_steps': 25000,
    'criterion': 'mse',
    'optimizer': 'adam',

}

In [None]:
#@title
class Conv2dSamePadding(nn.Conv2d):
    def forward(self, input):
        return conv2d_same_padding(
            input, self.weight, self.bias, self.stride, self.dilation, self.groups
        )


def conv2d_same_padding(input, weight, bias=None, stride=1, dilation=1, groups=1):
    # stride and dilation are expected to be tuples.

    # first, we'll figure out how much padding is necessary for the rows
    input_rows = input.size(2)
    filter_rows = weight.size(2)
    effective_filter_size_rows = (filter_rows - 1) * dilation[0] + 1
    out_rows = (input_rows + stride[0] - 1) // stride[0]
    padding_rows = max(
        0, (out_rows - 1) * stride[0] + effective_filter_size_rows - input_rows
    )
    rows_odd = padding_rows % 2 != 0

    # same for columns
    input_cols = input.size(3)
    filter_cols = weight.size(3)
    effective_filter_size_cols = (filter_cols - 1) * dilation[1] + 1
    out_cols = (input_cols + stride[1] - 1) // stride[1]
    padding_cols = max(
        0, (out_cols - 1) * stride[1] + effective_filter_size_cols - input_cols
    )
    cols_odd = padding_cols % 2 != 0

    if rows_odd or cols_odd:
        input = F.pad(input, [0, int(cols_odd), 0, int(rows_odd)])

    return F.conv2d(
        input,
        weight,
        bias,
        stride,
        padding=(padding_rows // 2, padding_cols // 2),
        dilation=dilation,
        groups=groups,
)

In [None]:
wandb.init(project='DL_Crop_yield', config=config)

### Define the Model
The following class contains 8 layers (5 convolutional layers + 2 Linear layers). Each convolutional layer is followed by a batch normalization unit, a Rectified Linear Unit, and a dropout.

This network takes an image with a dimension of 32x32, and 9 channels. That is, the expected input is 9x32x32. Note that Pytorch Conv2d expects the channels to be the first axis by default.

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Define layer 1
        self.conv1 = Conv2dSamePadding(config['conv1']['in_chan'],
                                       config['conv1']['out_chan'],
                                       kernel_size=config['conv1']['kernel_size'],
                                       stride=config['conv1']['stride'])
        self.bn1 = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu1 = nn.ReLU()
        self.drop1 = nn.Dropout(p=0.5, inplace=False)

        # Define layer 2
        self.conv2 = Conv2dSamePadding(config['conv2']['in_chan'],
                                       config['conv2']['out_chan'],
                                       kernel_size=config['conv2']['kernel_size'],
                                       stride=config['conv2']['stride'])

        self.bn2 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu2 = nn.ReLU()
        self.drop2 = nn.Dropout(p=0.5, inplace=False)

        # Define layer 3
        self.conv3 = Conv2dSamePadding(config['conv3']['in_chan'],
                                       config['conv3']['out_chan'],
                                       kernel_size=config['conv3']['kernel_size'],
                                       stride=config['conv3']['stride'])


        self.bn3 = nn.BatchNorm2d(256,
                                  eps=1e-05,
                                  momentum=0.1,
                                  affine=True,
                                  track_running_stats=True)
        self.relu3 = nn.ReLU()
        self.drop3 = nn.Dropout(p=0.5, inplace=False)

        # Define layer 4
        self.conv4 = Conv2dSamePadding(config['conv4']['in_chan'],
                                       config['conv4']['out_chan'],
                                       kernel_size=config['conv4']['kernel_size'],
                                       stride=config['conv4']['stride'])

        self.bn4 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu4 = nn.ReLU()
        self.drop4 = nn.Dropout(p=0.5, inplace=False)

        # Define layer 5
        self.conv5 = Conv2dSamePadding(config['conv5']['in_chan'],
                                       config['conv5']['out_chan'],
                                       kernel_size=config['conv5']['kernel_size'],
                                       stride=config['conv5']['stride'])

        self.bn5 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu5 = nn.ReLU()
        self.drop5 = nn.Dropout(p=0.5, inplace=False)

        # Define layer 6
        self.conv6 = Conv2dSamePadding(config['conv6']['in_chan'],
                                       config['conv6']['out_chan'],
                                       kernel_size=config['conv6']['kernel_size'],
                                       stride=config['conv6']['stride'])

        self.bn6 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.relu6 = nn.ReLU()
        self.drop6 = nn.Dropout(p=0.5, inplace=False)

        self.fc1 = nn.Linear(in_features=8192, out_features=2048, bias=True)
        self.relu7 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=2048, out_features=1, bias=True)

    def forward(self, x):
        x = self.drop1(self.relu1(self.bn1(self.conv1(x))))
        x = self.drop2(self.relu2(self.bn2(self.conv2(x))))
        x = self.drop3(self.relu3(self.bn3(self.conv3(x))))
        x = self.drop4(self.relu4(self.bn4(self.conv4(x))))
        x = self.drop5(self.relu5(self.bn5(self.conv5(x))))
        x = self.drop6(self.relu6(self.bn6(self.conv6(x))))

        x = x.view(x.shape[0], -1)

        x = self.relu7(self.fc1(x))
        x = self.fc2(x)
        return x


## Question 7.2.1
Some hyperparameters of the model above are not controlled by the `config` dictionary. Please go up and update the model so that all of the models hyperparameters are taken from the `config` dictionary.

## Training
In this training loop we randomly split the data into two sets (training and validation), create a data loader, an optimizer, and a loss function. Then, we use the above objects and our model to train it. The function is thouroughly commented please read it to understand what this function is doing.


In [None]:
def train(model, train_data, train_steps,batch_size, lr):
    # Determine the number of samples we have
    total_size = train_data[0].shape[0]
    # Use 10% of the data for validation
    val_size = total_size // 10
    # Keep the rest for training
    train_size = total_size - val_size

    # Split the dataset into training and validation sets
    train_dataset, val_dataset = random_split(
            TensorDataset(train_data[0], train_data[1]), (train_size, val_size)
    )
    # Create the training data loader
    train_dataloader = DataLoader(
            train_dataset, batch_size=batch_size, shuffle=True
    )
    # Create the validation data loader
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

    # Create an optimizer that will be used to update the model
    if config['optimizer'] == 'adam':
        optimizer = torch.optim.Adam(
                model.parameters(),
                lr=config['lr'],
        )
    # Determine how many times the model should see each data point in the training data
    num_epochs = int(train_steps / (total_size / batch_size))
    print(f"Training for {num_epochs} epochs")

    # Instantiate dictionaries to keep scores
    train_scores = defaultdict(list)
    val_scores = defaultdict(list)

    step_number = 0
    min_loss = np.inf

    # Initialize a variable to save the best weights (weights that give the best results)
    best_state = model.state_dict()

    # Define a new loss function (MSE Loss is widely used for regression tasks)
    if config['criterion'] == 'mse':
        criterion = nn.MSELoss()
    for epoch in range(num_epochs):
        # Switch the model to training mode
        model.train()

        # running train and val scores are only for printing out
        # information
        running_train_scores = defaultdict(list)

        # Iterate over the dataset
        for train_x, train_y in tqdm(train_dataloader):
            # Clear the previous gradients accumulated from the model
            optimizer.zero_grad()

            # Pass the training data to the model
            pred_y = model(train_x.to(device))

            # Compute the loss
            loss = criterion(pred_y, train_y.to(device))

            # Propagate gradients through the network
            loss.backward()

            # Update the weights
            optimizer.step()

            # Keep the scores
            train_scores["loss"].append(loss.item())

            step_number += 1

            # Decrease the learning rate after 4000 and 20000 steps
            if step_number in [4000, 20000]:
                for param_group in optimizer.param_groups:
                    param_group["lr"] /= 10

        train_output_strings = []
        wandb_data = dict()
        for key, val in running_train_scores.items():
            wandb_data[key] = round(np.array(val).mean())
            train_output_strings.append(
                "{}: {}".format(key, round(np.array(val).mean(), 5))
            )

        running_val_scores = defaultdict(list)
        # Switch the model to evaluation mode
        model.eval()

        with torch.no_grad():
            # Iterate over the validation dataset
            for i, (val_x, val_y) in tqdm(enumerate(val_dataloader)):
                # Pass the validation sample to the model
                if i == 0:
                    img = make_grid(val_x[:, :3, ...])
                    wandb.log({"img": img})
                val_pred_y = model(val_x.to(device))

                # Compute the loss
                val_loss = criterion(val_pred_y, val_y.to(device))

                # Save the computed loss
                val_scores["loss"].append(val_loss.item())

        val_output_strings = []
        for key, val in running_val_scores.items():
            wandb_data[key] = round(np.array(val).mean())
            val_output_strings.append(
                "{}: {}".format(key, round(np.array(val).mean(), 5))
            )
        print("TRAINING: {}".format(", ".join(train_output_strings)))
        print("VALIDATION: {}".format(", ".join(val_output_strings)))

        # Compute the mean validation loss (over the validation samples)
        epoch_val_loss = np.array(running_val_scores["loss"]).mean()
        wandb_data['step'] = step_number
        wandb_data['epoch_val_loss'] = epoch_val_loss
        wandb.log(wandb_data)

        # If the validation loss is smaller than the historic minimum save the current weights as the best weights
        if epoch_val_loss < min_loss:
            best_state = model.state_dict()
            min_loss = epoch_val_loss
    # Return the best model
    model.load_state_dict(best_state)
    return train_scores, val_scores

### Evaluation
Now that we have training function let's write a function that evaluates the model on the training set and the test set. This function will iterate over the training and test datasets to compute the output of each sample in each set. Read the comments to understand what the function is doing.

In [None]:
def predict(model, train_data, test_data, batch_size):
    train_images, train_yields, train_locations, train_indices, train_years = train_data

    # Create a dataset of training samples
    train_dataset = TensorDataset(
        train_images, train_yields, train_locations, train_indices, train_years
    )

    test_images, test_yields, test_locations, test_indices, test_years = test_data

    # Create a dataset of test samples
    test_dataset = TensorDataset(
        test_images, test_yields, test_locations, test_indices, test_years
    )

    # Create a train and test data loaders
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

    results = defaultdict(list)

    # Switch to evaluation mode
    model.eval()

    with torch.no_grad():
        # Iterate over the training dataset
        for train_im, train_yield, train_loc, train_idx, train_year in tqdm(
            train_dataloader):
            # Pass the sample to the model and compute the output
            model_output = model(train_im.to(device))
            pred = model_output

            # Save the results
            results["train_pred"].extend(pred.squeeze(1).tolist())
            results["train_real"].extend(train_yield.squeeze(1).tolist())
            results["train_loc"].append(train_loc.numpy())
            results["train_indices"].append(train_idx.numpy())
            results["train_years"].extend(train_year.tolist())

        # Iterate over the test dataset
        for test_im, test_yield, test_loc, test_idx, test_year in tqdm(
            test_dataloader
        ):
            # Compute the output
            model_output = model(test_im.to(device))
            pred = model_output

            # Save the output
            results["test_pred"].extend(pred.squeeze(1).tolist())
            results["test_real"].extend(test_yield.squeeze(1).tolist())
            results["test_loc"].append(test_loc.numpy())
            results["test_indices"].append(test_idx.numpy())
            results["test_years"].extend(test_year.tolist())

    for key in results:
        if key in [
            "train_feat",
            "test_feat",
            "train_loc",
            "test_loc",
            "train_indices",
            "test_indices",
            "test_pred",
            "test_real",
        ]:
            results[key] = np.array(results[key])

    return results

## Running the Training and Evaluation
Let's say we want to predict the yield for the year 2012. What we should do is train on data available for the years 2003 - 2011 and evaluate the model on the data for the year 2012. That way we can ensure we have trained a causal model (a model that has predictive power without seeing future data).

In [None]:
def run_one_year(model, predict_year, time, train_steps, batch_size, lr):
    # Use the data for years precedding `predict_year` as training data
    train_idx = np.nonzero(years < predict_year)[0]
    # Use the data for the year `predict_year` as evaluation (testing) data
    test_idx = np.nonzero(years == predict_year)[0]

    train_images, test_images = normalize_images(images[train_idx],
                                                  images[test_idx])

    # Return a training data tuple containing images, yields, locations, indices, and years
    train_data = (
        torch.as_tensor(
                    train_images[:, :, :time, :], device=device
                ).float(),
        torch.as_tensor(yields[train_idx], device=device).float().unsqueeze(1),
        torch.as_tensor(locations[train_idx]),
        torch.as_tensor(indices[train_idx]),
        torch.as_tensor(years[train_idx]),
    )

    # Return a test data tuple containing images, yields, locations, indices, and years
    test_data = (
            torch.as_tensor(
                test_images[:, :, :time, :], device=device
            ).float(),
            torch.as_tensor(yields[test_idx], device=device).float().unsqueeze(1),
            torch.as_tensor(locations[test_idx]),
            torch.as_tensor(indices[test_idx]),
            torch.as_tensor(years[test_idx]),
    )

    # Call the training function to train the model
    train_scores, val_scores = train(model, train_data, train_steps, batch_size, lr)

    # Call the evaluation function to evaluate the model on train and test sets
    results = predict(model, train_data, test_data, batch_size)

    model_information = {
            "state_dict": model.state_dict(),
            "val_loss": val_scores["loss"],
            "train_loss": train_scores["loss"],
    }
    for key in results:
        model_information[key] = results[key]

    true, pred =  model_information["test_real"], model_information["test_pred"]

    # Compute the RMSE and ME
    rmse = np.sqrt(np.mean((true - pred) ** 2))
    me = np.mean(true - pred)

    return rmse, me

## Running the Training/Evaluation
We will now use the following function that uses the functions above to run the entire process of training.

This function run training and evaluation of the model for each year in the variable `pred_years` defined above.

In [None]:
def run(model, batch_size=32, learning_rate=1e-3, weight_decay=0,
        train_steps=2500):
    years_list, run_numbers, rmse_list, me_list, times_list = [], [], [], [], []
    times = [32]
    for pred_year in pred_years:
        for time in times:
            results = run_one_year(model, pred_year, time, train_steps,
                                   batch_size,learning_rate)
            years_list.append(pred_year)
            times_list.append(time)
            rmse, me = results
            rmse_list.append(rmse)
            me_list.append(me)
    data = {
        "year": years_list,
        "time_idx": times_list,
        "RMSE": rmse_list,
        "ME": me_list,
    }
    results_df = pd.DataFrame(data=data)
    results_df.to_csv("results.csv")

## Question 7.2.2
In the above function there are some hyperparameters that need to be put in the config. Find them and put them in the `config` dictionary


In [None]:
model = ConvNet()
model.to(device)
model

In [None]:
run(model, train_steps=25000)

In [None]:
!python train.py