In [1]:
import cv2
import numpy as np
import os
import natsort
import copy
import torch
import pandas as pd
import json

from torchvision import transforms, models
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold
import torch.nn as nn

In [2]:
from google.colab import drive 
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [3]:
from google.colab.patches import cv2_imshow

Data augmentation

In [38]:
def augment(roi, fname, size):
    path = '/content/gdrive/My Drive/Colab Notebooks/PartA/data/augment/'
    if not os.path.exists(path):
        os.mkdir(path)
    augmented_images = []
    augmented_fnames = []
    for n in range(size):
        for src, f in zip(roi, fname):
            p = np.random.randint(4)
            angle = int(90 * p)
            h, w = src.shape[:2]
            rotation = cv2.getRotationMatrix2D((w/2, h/2), angle, 1)
            dst = cv2.warpAffine(src, rotation, (w, h), borderValue=[0, 0, 0, 0])
            q = np.random.rand(1)
            if q < 0.25:
                dst = cv2.flip(dst, 1)
            elif q < 0.50:
                dst = cv2.flip(dst, 0)
            elif q < 0.75:
                dst = cv2.flip(dst, 1)
                dst = cv2.flip(dst, 0)
            f = 'a{}'.format(n) + f
            augmented_images.append(dst)
            augmented_fnames.append(f)
            cv2.imwrite(os.path.join(path, f), dst)
            
    return augmented_images, augmented_fnames

Prepare data - Extract ROI (Region of interest)

In [94]:
def prepare_data(mode='train', augmentation_size=8):
    if mode == 'train':
        p = '/content/gdrive/My Drive/Colab Notebooks/PartA/data/'
        needAugment = True
        verbose = True
    elif mode == 'evaluate':
        p = '/content/gdrive/My Drive/Colab Notebooks/PartA/eval/'
        needAugment = False
        verbose = False

    input_rgb_path = []
    input_depth_path = []
    input_rgb_fname = []
    input_depth_fname = []
    lettuce_rgb_roi = []
    lettuce_depth_roi = []

    save_path = os.path.join(p, 'ROI')
    
    ROI_SIZE = (768, 768)

    for path, direct, files in os.walk(p):
        for f in files:
            if f.startswith('RGB_') and not path.endswith('ROI'):
                input_rgb_fname.append(f)
                input_rgb_path.append(os.path.join(path, f))
            elif f.startswith('Debth_') and not path.endswith('ROI'):
                input_depth_fname.append(f)
                input_depth_path.append(os.path.join(path, f))

    if verbose:
        print("Got {} RGB images".format(len(input_rgb_path)))
        print("Got {} depth images".format(len(input_depth_path)))

    rgb_idx = natsort.index_natsorted(input_rgb_fname)
    depth_idx = natsort.index_natsorted(input_depth_fname)

    input_rgb_path = natsort.order_by_index(input_rgb_path, rgb_idx)
    input_rgb_fname = natsort.order_by_index(input_rgb_fname, rgb_idx)
    input_depth_path = natsort.order_by_index(input_depth_path, depth_idx)
    input_depth_fname = natsort.order_by_index(input_depth_fname, depth_idx)

    for fname, fname_d, savename, savename_d in zip(input_rgb_path, input_depth_path, input_rgb_fname, input_depth_fname):
        # Get RGB image and depth image simultaneously
        src = cv2.imread(fname)
        src_d = cv2.imread(fname_d, 3)
        # Take 1 channel of the depth image
        src_d = cv2.cvtColor(src_d, cv2.COLOR_BGR2GRAY)
        src_h, src_w = src.shape[:2]
        _, src_d = cv2.threshold(src_d, 1000, 255, cv2.THRESH_TOZERO_INV)
   
        src_d = np.interp(src_d, [0, src_d.max()], [0, 255]).astype('uint8')
        center_h, center_w = src_h // 2, src_w // 2
        roi = src[center_h  - ROI_SIZE[1] // 2: center_h + ROI_SIZE[1] // 2, center_w - ROI_SIZE[0] // 2: center_w + ROI_SIZE[0] // 2]
        roi_d = src_d[center_h  - ROI_SIZE[1] // 2: center_h + ROI_SIZE[1] // 2, center_w - ROI_SIZE[0] // 2: center_w + ROI_SIZE[0] // 2]
        roi_d = np.interp(roi_d, [0, roi_d.max()], [0, 255]).astype('uint8')
        roi_d = roi_d.reshape((ROI_SIZE[0], ROI_SIZE[1], 1))
        
    
        roi_m = np.concatenate((roi, roi_d), axis=2)
        # roi_m = roi
        roi_m = cv2.resize(roi_m, (256, 256))

        num = int(fname.split("/")[-1].split(".")[0].split("_")[1])
        lettuce_rgb_roi.append(roi_m)
        # print(roi.shape)
        savename = os.path.join(save_path, savename)

        cv2.imwrite(savename, roi_m)
    
    lettuce_rgb_fnames = copy.copy(input_rgb_fname)

    
    

    if needAugment:
        augmented_images, augmented_fnames = augment(lettuce_rgb_roi, lettuce_rgb_fnames, augmentation_size)
        lettuce_rgb_roi.extend(augmented_images)
        lettuce_rgb_fnames.extend(augmented_fnames)

    if verbose:
        print("Image prepared! ")
        print("Processed {} images".format(len(lettuce_rgb_roi)))
        # to show original images
        cv2_imshow(src)
        cv2_imshow(roi)
        cv2_imshow(roi_m)

    return lettuce_rgb_roi, lettuce_rgb_fnames

In [109]:
input_images, input_fnames = prepare_data(augmentation_size=8)
test_images, test_fnames = prepare_data(mode='evaluate')


Output hidden; open in https://colab.research.google.com to view.

Define custom dataset

In [110]:
IMG_SIZE = (256, 256)

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image, label, transform=None):
        self.transform = transform
        self.image = image
        self.label = label
        self.img_size = (256, 256)


    def __len__(self):
        return len(self.image)

    def __getitem__(self, index):
        self.img_data = self.image[index][0]
        self.img_label = self.label[index]
        self.img_fname = self.image[index][1]

        return [self.img_data, self.img_label, self.img_fname]

    def custom_collate_fn(self, data):
        inputImages = []
        outputVectors = []
        fileNames = []
        h, w = self.img_size
        for sample in data:
            img = sample[0]
            label = sample[1]
            fname = sample[2]

            if img.ndim == 2:
                img = img[:, :, np.newaxis]
            
            inputImages.append(img.reshape((h, w, 4)).transpose((2, 0, 1)).astype(np.float32))
            outputVectors.append(label)
            fileNames.append(fname)

        data = {'input': inputImages, 'label': outputVectors, 'fname': fileNames}
        
        return data


class ToTensor(object):
  def __call__(self, data):
    label, input, fname = data['label'], data['input'], data['fname']
    h, w = IMG_SIZE
    input_tensor = torch.empty(len(input), 4, h, w)
    label_tensor = torch.empty(len(input), 5)
    for i in range(len(input)):
      input[i] = input[i].transpose((2, 0, 1)).astype(np.float32)
      input_tensor[i] = torch.from_numpy(input[i])
      label_tensor[i] = torch.from_numpy(label[i])

    data = {'label': label_tensor, 'input': input_tensor}

    return data


Load data

In [111]:
input_images = np.array(input_images)
input_fnames = np.array(input_fnames)

target_df = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/PartA/results/output_df.csv', index_col='Unnamed: 0')
target_df

Unnamed: 0,shoot_fw,shoot_dw,height,diameter,leafarea
Image27,5.5,0.42,9.0,11.0,153.9
Image79,30.3,1.92,8.8,20.2,582.3
Image140,60.9,2.83,11.6,19.0,960.6
Image203,112.0,5.76,11.0,22.0,1614.9
Image292,372.6,14.17,17.0,32.0,3839.6
...,...,...,...,...,...
Image20,4.5,0.40,8.5,13.0,127.4
Image31,6.6,0.58,7.5,14.6,159.3
Image197,127.0,6.30,12.0,21.0,2110.5
Image246,224.1,8.61,15.0,24.5,3777.1


Normalize data to 0~1

In [112]:
max_values = target_df.max().values
min_values = target_df.min().values
target_df = (target_df - min_values) / (max_values - min_values)

Extract images in target_df

In [113]:
img_idx = ['Image'+ s.split("_")[1].split(".")[0] for s in input_fnames]
target_df = target_df.loc[img_idx, :]

Process input images and output labels

In [114]:
idx = natsort.index_natsorted(input_fnames)
input_fnames = np.array(natsort.order_by_index(input_fnames, idx))
input_images = np.array(natsort.order_by_index(input_images, idx))
output_labels = target_df.values

transform = transforms.Compose([ToTensor()])

In [115]:
batch_size = 64
epochs = 20
lr = 0.0001

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Current device:', device)

Current device: cuda


In [116]:
kf = KFold(5)
kf

fold_idx = 0

In [None]:
for train_idx, val_idx in kf.split(output_labels):
  fold_idx += 1
  image_train, image_val = input_images[train_idx], input_images[val_idx]
  label_train, label_val = output_labels[train_idx], output_labels[val_idx]

  fname_train, fname_val = input_fnames[train_idx], input_fnames[val_idx]

  image_train = list(zip(image_train, fname_train))
  image_val = list(zip(image_val, fname_val))


  earlyStoppingCount = 0
  dataset_train = CustomDataset(image_train, label_train)
  loader_train = DataLoader(dataset_train, batch_size=64, shuffle=True, collate_fn=dataset_train.custom_collate_fn, num_workers=0)
  dataset_val = CustomDataset(image_val, label_val)
  loader_val = DataLoader(dataset_val, batch_size=len(val_idx), shuffle=True, collate_fn=dataset_val.custom_collate_fn, num_workers=0)  

  # define model
  model = models.resnet18(pretrained=True)
  model.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)
  model.fc = nn.Linear(512, 5, bias=True)

  model.to(device)
  criterion = nn.MSELoss().to(device)
  optim = torch.optim.Adam(model.parameters(), lr=lr)
  best_epoch = 0
  val_loss_save = np.inf

  for n, epoch in enumerate(range(epochs)):
    model.train()
    train_loss = []

    for batch, data in enumerate(loader_train, 1):
      label = torch.tensor(data['label'], dtype=torch.float32).to(device)
      input = torch.tensor(data['input'], dtype=torch.float32).to(device)
      output = model(input)
      
      optim.zero_grad()

      loss = criterion(output, label)
      loss.backward()

      optim.step()

      train_loss += [loss.item()]
    
    with torch.no_grad():
      model.eval()
      val_loss = []
      for batch, data in enumerate(loader_val, 1):
        label_val = torch.tensor(data['label'], dtype=torch.float32).to(device)
        input_val = torch.tensor(data['input'], dtype=torch.float32).to(device)
        output_val = model(input_val)
        loss = criterion(output_val, label_val)
        val_loss += [loss.item()] 
      
      val_loss_tmp = np.mean(val_loss)
      earlyStoppingCount += 1
      
      if val_loss_tmp < val_loss_save:
          earlyStoppingCount = 0
          best_epoch = epoch
          val_loss_save = val_loss_tmp.item()
          torch.save(model.state_dict(), '/content/gdrive/My Drive/Colab Notebooks/PartA/models/param{}.data'.format(fold_idx))
          print(".......model updated (epoch = ", epoch+1, ")")
      print("epoch: %04d / %04d | train loss: %.5f | validation loss: %.5f" % (epoch+1, epochs, np.mean(train_loss), np.mean(val_loss)))
    
  print("Model with the best validation accuracy is saved.")
  print("Best epoch: ", best_epoch)
  print("Best validation loss: {:.5f}".format(val_loss_save))
  print("Done.")

  if fold_idx > 0:
    break
   

.......model updated (epoch =  1 )
epoch: 0001 / 0020 | train loss: 0.06235 | validation loss: 0.03498
.......model updated (epoch =  2 )
epoch: 0002 / 0020 | train loss: 0.01374 | validation loss: 0.01044
.......model updated (epoch =  3 )
epoch: 0003 / 0020 | train loss: 0.00618 | validation loss: 0.00886
.......model updated (epoch =  4 )
epoch: 0004 / 0020 | train loss: 0.00359 | validation loss: 0.00593
epoch: 0005 / 0020 | train loss: 0.00282 | validation loss: 0.00609


In [None]:
image_test = test_images
fname_test = test_fnames

# cv2_imshow(image_test[0])
image_t = np.array(image_test).transpose((0, 3, 1, 2))
input_test = torch.tensor(image_t, dtype=torch.float32).to(device)

res = model(input_test).cpu().detach().numpy()
res = (max_values - min_values) * res + min_values
res



In [None]:
FILE_DIRECTORY = '/content/gdrive/My Drive/Colab Notebooks/PartA/eval/GroundTruth.json'
with open(FILE_DIRECTORY) as data_file:    
    JSON_data = json.load(data_file)
image_index = np.array(list(JSON_data['Measurements'].keys()))
shoot_fw = []
shoot_dw = []
height = []
diameter = []
leafarea = []

img_idx = ['Image'+ s.split("_")[1].split(".")[0] for s in fname_test]

for _ in JSON_data['Measurements'].keys():
    shoot_fw.append(JSON_data['Measurements'][_]['FreshWeightShoot'])
    shoot_dw.append(JSON_data['Measurements'][_]['DryWeightShoot'])
    height.append(JSON_data['Measurements'][_]['Height'])
    diameter.append(JSON_data['Measurements'][_]['Diameter'])
    leafarea.append(JSON_data['Measurements'][_]['LeafArea'])
output_df = pd.DataFrame(np.array([shoot_fw, shoot_dw, height, diameter, leafarea]).T, index=image_index, columns=['shoot_fw', 'shoot_dw', 'height', 'diameter', 'leafarea'])

output_df = output_df.loc[img_idx, :]


In [None]:
output_df

In [None]:

res_df = pd.DataFrame(res, columns=['shoot_fw', 'shoot_dw', 'height', 'diameter', 'leafarea'], index=img_idx)
res_df

In [None]:
n1 = (output_df - res_df) ** 2
n2 = output_df ** 2
nmse = np.sum(n1) / np.sum(n2)
print(nmse)
print(np.sum(nmse))