<a href="https://colab.research.google.com/github/plant-ai-biophysics-lab/DeformableCNN-PlantTraits/blob/main/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!git clone https://github.com/plant-ai-biophysics-lab/DeformableCNN-PlantTraits.git

In [2]:
import os
os.chdir('/content/DeformableCNN-PlantTraits')

In [3]:
%%capture
!pip install albumentations==1.1.0
!pip install agml


# Training and Evaluation Pipeline

### Data and config setup

Import libraries

In [4]:
import os
import time
import torch, torchvision
import numpy as np
import torch.nn as nn
from torch.functional import split
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from sklearn.model_selection import train_test_split, StratifiedKFold

from torch.utils.tensorboard import SummaryWriter

from datatools import *
from engine import train_single_epoch, validate
from loss import NMSELoss
from architecture import GreenhouseMidFusionRegressor

Download 2021 Autonomous Greenhouse Challenge dataset

In [5]:
import agml
loader = agml.data.AgMLDataLoader('autonomous_greenhouse_regression', dataset_path = './')

Downloading autonomous_greenhouse_regression (size = 887.2 MB): 887226368it [00:50, 17579122.57it/s]                               


[AgML Download]: Extracting files for autonomous_greenhouse_regression... Done!

You have just downloaded [1mautonomous_greenhouse_regression[0m.

This dataset is licensed under the [1mCC BY-SA 4.0[0m license.
To learn more, visit: https://creativecommons.org/licenses/by-sa/4.0/

When using this dataset, please cite the following:

@misc{https://doi.org/10.4121/15023088.v1,
  doi = {10.4121/15023088.V1},
  url = {https://data.4tu.nl/articles/_/15023088/1},
  author = {Hemming,  S. (Silke) and de Zwart,  H.F. (Feije) and Elings,  A. (Anne) and bijlaard,  monique and Marrewijk,  van,  Bart and Petropoulou,  Anna},
  keywords = {Horticultural Crops,  Mechanical Engineering,  FOS: Mechanical engineering,  Artificial Intelligence and Image Processing,  FOS: Computer and information sciences,  Horticultural Production,  FOS: Agriculture,  forestry and fisheries,  Autonomous Greenhouse Challenge,  autonomous greenhouse,  Artificial Intelligence,  image processing,  computer vision,  Horti

AttributeError: Received invalid info parameter: 'num_to_class'.

Define data and output directories

In [6]:
sav_dir='model_weights/'
if not os.path.exists(sav_dir):
    os.mkdir(sav_dir)
# Comment these two lines and uncomment the next two if you've already croppped the images to another directory
RGB_Data_Dir   = './autonomous_greenhouse_regression/images/'
Depth_Data_Dir = './autonomous_greenhouse_regression/depth_images/'


# RGB_Data_Dir='./autonomous_greenhouse_regression/cropped_images/'
# Depth_Data_Dir='./autonomous_greenhouse_regression/cropped_depth_images/'


JSON_Files_Dir = './autonomous_greenhouse_regression/annotations.json'

Crop the data if necessary (if you did this beforehand or you don't need to crop don't run)

In [7]:
# import matplotlib.pyplot as plt
import cv2
min_x=650
max_x=1450
min_y=200
max_y=900
cropped_img_dir='./autonomous_greenhouse_regression/cropped_images/'

cropped_depth_img_dir='./autonomous_greenhouse_regression/cropped_depth_images/'

if not os.path.exists(cropped_img_dir):
    os.mkdir(cropped_img_dir)

if not os.path.exists(cropped_depth_img_dir):
    os.mkdir(cropped_depth_img_dir)

for im in os.listdir(RGB_Data_Dir):
    img = cv2.imread(RGB_Data_Dir+im)
    crop_img = img[min_y:max_y,min_x:max_x]
    cv2.imwrite(cropped_img_dir+im, crop_img)

for depth_im in os.listdir(Depth_Data_Dir):
    depth_img = cv2.imread(Depth_Data_Dir+depth_im, 0)
    crop_depth_img = depth_img[min_y:max_y,min_x:max_x]
    cv2.imwrite(cropped_depth_img_dir+depth_im, crop_depth_img)

RGB_Data_Dir   = cropped_img_dir
Depth_Data_Dir = cropped_depth_img_dir

Set model architectures options:
- single vs. multi input (SI- or MI-)
- single vs. multi output (-SO or -MO)
- deformable vs. standard convolutions

In [8]:
ConvType = 'deformable' # 'standard'

training_category = 'MIMO' #'MIMO', 'MISO', 'SIMO', 'SISO'

# Multi-input, multi-output model
if training_category   == 'MIMO':
    inputs = ['RGB-D']
    outputs = ['ALL']
    NumOutputs = None

# Multi-input, single-output model
elif training_category == 'MISO':
    inputs = ['RGB-D']
    outputs = ['FreshWeightShoot','DryWeightShoot','Height','Diameter','LeafArea']
    NumOutputs = 1

# Single-input, multi-output model
elif training_category == 'SIMO':
    inputs = ['RGB','D']
    outputs = ['ALL']
    NumOutputs = None

# Single-input, single-output model
elif training_category == 'SISO':
    inputs = ['RGB','D']
    outputs = ['FreshWeightShoot','DryWeightShoot','Height','Diameter','LeafArea']
    NumOutputs = 1

Set other model config parameters

In [10]:
split_seed = 12
num_epochs = 10

Create PyTorch dataset, create PyTorch dataloader, and split train/val/test

In [11]:
# Instantiate the PyTorch datalaoder the autonomous greenhouse dataset.
dataset = GreenhouseDataset(rgb_dir = RGB_Data_Dir,
                            d_dir = Depth_Data_Dir,
                            jsonfile_dir = JSON_Files_Dir,
                            transforms = get_transforms(train=False, means=[0,0,0,0],stds=[1,1,1,1]))
if NumOutputs !=1:
    NumOutputs=dataset.num_outputs

# Remove last 50 images from training/validation set. These are the test set.
dataset.df= dataset.df.iloc[:-50]

# Split train and validation set. Stratify based on variety.
train_split, val_split = train_test_split(dataset.df,
                                          test_size = 0.2,
                                          random_state = split_seed,
                                          stratify = dataset.df['outputs'].str['classification']) #change to None if you don't have class info
train = torch.utils.data.Subset(dataset, train_split.index.tolist())
val   = torch.utils.data.Subset(dataset, val_split.index.tolist())

# Create train and validation dataloaders
train_loader = torch.utils.data.DataLoader(train, batch_size=6, num_workers=6, shuffle=True)
val_loader   = torch.utils.data.DataLoader(val,   batch_size=6, shuffle=False, num_workers=6)




Determine the mean and standard deviation of images for normalization (Only need to do once for a new dataset)

In [12]:
# this part is just to check the MEAN and STD of the dataset (dont run unless you need mu and sigma)

nimages = 0
mean = 0.
std = 0.
dataloader = torch.utils.data.DataLoader(dataset, batch_size=5, shuffle=False, num_workers=12)
dataset.input = 'RGB-D'
dataset.out = 'ALL'
for batch, _ in dataloader:

    # Rearrange batch to be the shape of [B, C, W * H]
    batch = batch.view(batch.size(0), batch.size(1), -1)
    # Update total number of images
    nimages += batch.size(0)
    # Compute mean and std here
    mean += batch.mean(2).sum(0)
    std += batch.std(2).sum(0)

# Final step
mean /= nimages
std /= nimages

print('Mean: '+ str(mean))
print('Standard Deviation', str(std))




Mean: tensor([0.5482, 0.4620, 0.3602, 0.0127])
Standard Deviation tensor([0.1639, 0.1761, 0.2659, 0.0035])


Copy the output of the previous cells into here to avoid needing to redetermine mean and std every time

In [13]:
dataset.means=[0.5482, 0.4620, 0.3602, 0.0127]  #these values were copied from the previous cell
dataset.stds=[0.1639, 0.1761, 0.2659, 0.0035]   #copy and paste the values to avoid having
                                                # to rerun the previous cell for every iteration

Define the loss function as Normalized Mean Squared Error, as required for the 2021 Autonomous Greenhouse Challenge

In [14]:
criterion = NMSELoss()

### Training

Define the training loop and fit the model.

In [18]:
# Training loop
device = torch.device('cuda')

for input in inputs:
    for output in outputs:
        dataset.input = input
        dataset.out = output
        model = GreenhouseMidFusionRegressor(input_data_type = input, num_outputs = NumOutputs, conv_type = ConvType)
        model.to(device)
        params = [p for p in model.parameters() if p.requires_grad]

        optimizer = torch.optim.Adam(params,
                                     lr=0.0005,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay = 0,
                                     amsgrad = False)  # select an optimzer for each run


        best_val_loss = 9999999 # initial dummy value
        current_val_loss = 0
        # training_val_loss=0

        writer = SummaryWriter()
        start = time.time()

        for epoch in range(num_epochs):
            with open('run.txt', 'a') as f:
                f.write('\n')
                f.write('Epoch: '+ str(epoch + 1) + ', Time Elapsed: '+ str((time.time()-start)/60) + ' mins')
            print('Epoch: ', str(epoch + 1), ', Time Elapsed: ', str((time.time()-start)/60), ' mins')

            train_single_epoch(model, dataset, device, criterion, optimizer, writer, epoch, train_loader)

            best_val_loss = validate(model, dataset, device, training_category, sav_dir, criterion, writer, epoch, val_loader, best_val_loss)

Epoch:  1 , Time Elapsed:  2.7060508728027343e-06  mins




Train NMSE:  4.970419406890869
Train NMSE:  4.60944938659668
Train NMSE:  3.8320040702819824
Train NMSE:  3.3588671684265137
Train NMSE:  3.020707607269287
Train NMSE:  5.797316074371338
Train NMSE:  3.4509828090667725
Train NMSE:  3.4639430046081543
Train NMSE:  2.705380916595459
Train NMSE:  2.9257187843322754
Train NMSE:  2.9639089107513428
Train NMSE:  2.983748197555542
Train NMSE:  2.9683423042297363
Train NMSE:  2.691638231277466
Train NMSE:  2.46527361869812
Train NMSE:  2.965986490249634
Train NMSE:  2.4960169792175293
Train NMSE:  2.2979843616485596
Train NMSE:  2.23479962348938
Train NMSE:  2.0221686363220215
Train NMSE:  2.0449557304382324
Train NMSE:  3.023552179336548
Train NMSE:  2.1492156982421875
Train NMSE:  2.443488597869873
Train NMSE:  2.3152503967285156
Train NMSE:  2.3950610160827637
Train NMSE:  2.1690661907196045
Train NMSE:  2.2644739151000977
Train NMSE:  2.014524459838867
Train NMSE:  2.09029221534729
Train NMSE:  2.442634105682373
Train NMSE:  2.114592790603

### Evaluation

Define the test dataset

In [19]:
# Instantiate the PyTorch datalaoder the autonomous greenhouse dataset.
testset = GreenhouseDataset(rgb_dir = RGB_Data_Dir,
                            d_dir = Depth_Data_Dir,
                            jsonfile_dir = JSON_Files_Dir,
                            transforms = get_transforms(train=False, means=dataset.means, stds=dataset.stds))

# Grab last 50 images as test dataset
testset.df = testset.df[-50:]

# Get testset_size
testset_size = testset.df.shape[0]

# Create test dataloader
test_loader = torch.utils.data.DataLoader(testset,
                                          batch_size = 50,
                                          num_workers = 0,
                                          shuffle = False)

Define loss functions for model evaluation

In [20]:
cri = NMSELoss()
mse = nn.MSELoss()

Run the evaluation Loop

In [21]:
# Evaluation loop
device=torch.device('cuda')

with torch.no_grad():
    for input in inputs:
        final = torch.zeros((testset_size,0))
        all_targets = torch.zeros((testset_size,0))
        for output in outputs:
            print('Input is ', input)
            testset.input = input
            testset.out = output

            device=torch.device('cuda')
            model= GreenhouseMidFusionRegressor(input_data_type = input,
                                                num_outputs = NumOutputs,
                                                conv_type = ConvType)
            model.to(device)
            model.load_state_dict(torch.load(sav_dir + 'bestmodel' + training_category + '_' + input + '_' + output + '.pth'))
            model.eval()


            if output=='All':
                ap=torch.zeros((0,5))
                at=torch.zeros((0,5))
            else:
                ap=torch.zeros((0,1))
                at=torch.zeros((0,1))

            for rgbd, targets in test_loader:
                rgbd = rgbd.to(device)
                targets = targets.to(device)
                preds = model(rgbd)
                # mse_loss=mse(preds, targets)
                # nmse=criterion(preds, targets)
                # nmse, pred=cri(preds, targets)
                ap=torch.cat((ap, preds.detach().cpu()), 0)
                at=torch.cat((at, targets.detach().cpu()), 0)

            if output=='All':
                print('FW MSE: ', str(mse(ap[:,0],at[:,0]).tolist()))
                print('DW MSE: ', str(mse(ap[:,1],at[:,1]).tolist()))
                print('H MSE: ', str(mse(ap[:,2],at[:,2]).tolist()))
                print('D MSE: ', str(mse(ap[:,3],at[:,3]).tolist()))
                print('LA MSE: ', str(mse(ap[:,4],at[:,4]).tolist()))
            else:
                final=torch.cat((final, ap.detach().cpu()),1)
                all_targets=torch.cat((all_targets, at.detach().cpu()),1)
                print(output,' MSE: ', str(mse(ap,at).tolist()))

        if output == 'All':
            print('Overall NMSE: ', str(cri(ap,at).tolist()))
        else:
            print('Overall NMSE: ', str(cri(final,all_targets).tolist()))

Input is  RGB-D


  model.load_state_dict(torch.load(sav_dir + 'bestmodel' + training_category + '_' + input + '_' + output + '.pth'))


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.67 GiB. GPU 0 has a total capacity of 14.75 GiB of which 889.06 MiB is free. Process 10715 has 13.88 GiB memory in use. Of the allocated memory 10.74 GiB is allocated by PyTorch, and 3.00 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)