In [48]:
import cv2
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np 
import joblib
from torch.utils.data import DataLoader
from preprocessing import *
from utils import *
from datasets import *
from CNN_AE_helper import *
from CNN3d import *
from torchvision.transforms import v2
from scipy.ndimage import binary_erosion
import os

def filter_filenames_cropping(folder_path, camera_id, date_stamps, tray_ids):
    """
    Recursively filters filenames based on camera ID, date stamps, and tray IDs.
    """
    filtered_files = []
    
    for root, dirs, files in os.walk(folder_path):
        for f in files:
            if f.startswith(camera_id + "_") and \
               any(date in f for date in date_stamps) and \
               any(f.split("_")[2].startswith(tray) for tray in tray_ids):
                filtered_files.append(os.path.join(root, f))
                
    return filtered_files

# FX10 camera
#IMG_DIR = 'C:/Users/leonw/OneDrive - KU Leuven/Master Thesis/Data_cropped/cropped_hdf5/'
IMG_DIR = '/home/u0158953/data/Strawberries/PotsprocessedData/HDF5_FILES'
CAMERA = 'FX10'

# Healthy leaves
DATES = ['07SEPT2023', '08SEPT2023', '09SEPT2023', '10SEPT2023', '11SEPT2023', '12SEPT2023',
         '13SEPT2023', '14SEPT2023', '15SEPT2023', '18SEPT2023', '19SEPT2023']
TRAYS = ['2A', '2B']    # Some files from the FX17 camera are mistakenly named in 2D instead of 4D
healthy_FX10 = filter_filenames_cropping(folder_path=IMG_DIR, camera_id=CAMERA, date_stamps=DATES, tray_ids=TRAYS)

 # For now these only apply for the HSI data, but eventually we will only need that
INPUT_DATA = healthy_FX10  # [0:40] just to speed up the process for now
#MASK_FOLDER = 'C:/Users/leonw/OneDrive - KU Leuven/Master Thesis/Data_cropped/cropped_masks'
MASK_FOLDER = "/home/u0158953/data/Strawberries/PotsprocessedData/MASKS"

image_paths = INPUT_DATA
print(len(image_paths))

108


In [57]:
hsi_np, wlen = LoadHSI(image_paths[0], return_wlens=True)

In [58]:
wlen

array([ 398.75,  401.3 ,  403.84,  406.39,  408.94,  411.5 ,  414.06,
        416.62,  419.18,  421.74,  424.31,  426.88,  429.46,  432.03,
        434.61,  437.19,  439.78,  442.36,  444.95,  447.54,  450.14,
        452.73,  455.33,  457.93,  460.53,  463.14,  465.74,  468.35,
        470.96,  473.58,  476.19,  478.81,  481.43,  484.05,  486.68,
        489.3 ,  491.93,  494.56,  497.19,  499.83,  502.46,  505.1 ,
        507.74,  510.38,  513.02,  515.67,  518.31,  520.96,  523.61,
        526.26,  528.92,  531.57,  534.23,  536.89,  539.55,  542.21,
        544.87,  547.54,  550.2 ,  552.87,  555.54,  558.21,  560.88,
        563.56,  566.23,  568.91,  571.59,  574.26,  576.95,  579.63,
        582.31,  584.99,  587.68,  590.37,  593.06,  595.74,  598.44,
        601.13,  603.82,  606.51,  609.21,  611.91,  614.6 ,  617.3 ,
        620.  ,  622.7 ,  625.4 ,  628.11,  630.81,  633.52,  636.22,
        638.93,  641.64,  644.35,  647.06,  649.77,  652.48,  655.19,
        657.91,  660

In [59]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import label
import h5py
import os
# saving numpy arrays as png
import imageio.v2 as imageio

def extract_leaves_and_save(
    hsi_np, 
    leaf_mask, 
    base_name,
    save_hsi_dir, 
    save_mask_dir,
    save_preview_dir,
    wavelengths,  # New: 1D numpy array of wavelengths
    band_to_show=50, 
    min_pixels=20, 
    padding=10, 
    show=False
):
    """
    Extracts individual leaves from HSI image and saves:
    - Cropped HSI (.h5, dataset name: 'Hypercube', with 'Wavelengths' attribute)
    - Cropped mask (.png)
    - Cropped band preview (.png)
    """
    os.makedirs(save_hsi_dir, exist_ok=True)
    os.makedirs(save_mask_dir, exist_ok=True)
    os.makedirs(save_preview_dir, exist_ok=True)

    binary_leaf_mask = (leaf_mask == 1)
    labeled_mask, num_leaves = label(binary_leaf_mask)
    print(f"Found {num_leaves} connected leaf components")

    saved_count = 0
    _, H, W = hsi_np.shape

    for leaf_id in range(1, num_leaves + 1):
        single_leaf_mask = (labeled_mask == leaf_id)

        if np.sum(single_leaf_mask) < min_pixels:
            continue

        y_indices, x_indices = np.where(single_leaf_mask)
        y_min, y_max = y_indices.min(), y_indices.max()
        x_min, x_max = x_indices.min(), x_indices.max()

        y_min = max(0, y_min - padding)
        y_max = min(H - 1, y_max + padding)
        x_min = max(0, x_min - padding)
        x_max = min(W - 1, x_max + padding)

        cropped_hsi = hsi_np[:, y_min:y_max+1, x_min:x_max+1]
        cropped_mask = single_leaf_mask[y_min:y_max+1, x_min:x_max+1]
        masked_cropped_hsi = cropped_hsi * cropped_mask

        base_name_leaf = f"{base_name}_{saved_count}"

        # Save HSI as HDF5 with 'Hypercube' dataset and 'Wavelengths' attribute
        hsi_path = os.path.join(save_hsi_dir, base_name_leaf + '.h5')
        with h5py.File(hsi_path, 'w') as f:
            dset = f.create_dataset('Hypercube', data=masked_cropped_hsi.astype(np.float32))
            dset.attrs['Wavelengths'] = wavelengths.astype(np.float32)

        # Save mask as PNG
        mask_path = os.path.join(save_mask_dir, base_name_leaf + '.png')
        imageio.imwrite(mask_path, (cropped_mask.astype(np.uint8) * 255))

        # Save band preview as PNG
        preview_image = masked_cropped_hsi[band_to_show]
        preview_norm = ((preview_image - np.min(preview_image)) / (np.ptp(preview_image) + 1e-6) * 255).astype(np.uint8)
        preview_path = os.path.join(save_preview_dir, base_name_leaf + '.png')
        imageio.imwrite(preview_path, preview_norm)

        # Optional display
        if show:
            plt.imshow(preview_norm, cmap='gray')
            plt.title(f'Cropped Leaf {saved_count} - Band {band_to_show}')
            plt.axis('off')
            plt.show()

        saved_count += 1

    print(f"Saved {saved_count} leaves to:")
    print(f" - HSI files:   {save_hsi_dir}")
    print(f" - Mask files:  {save_mask_dir}")
    print(f" - Previews:    {save_preview_dir}")
    return saved_count

In [47]:
extract_leaves_and_save(
    hsi_np=hsi_np,
    leaf_mask=leaf_mask,
    base_name = os.path.splitext(os.path.basename(image_path))[0],
    save_hsi_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/Crop_HDF5',
    save_mask_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/Crop_Mask',
    save_preview_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB',
    padding=10,
    min_pixels=1000,
    band_to_show=50,
    show=False
)

Found 24 connected leaf components
Saved 6 leaves to:
 - HSI files:   /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_HDF5
 - Mask files:  /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_Mask
 - Previews:    /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB


6

In [62]:
def save_cropped_leaves(image_paths, apply_mask = True):
    for image_path in image_paths:
        hsi_np,wlens = LoadHSI(image_path, return_wlens=True)
            
        if apply_mask:
            # Open mask
            filename = os.path.basename(image_path).replace('.hdf5', '.png')
            mask_path = os.path.join(MASK_FOLDER, CAMERA,filename)
            leaf_mask = read_mask(mask_path)
        extract_leaves_and_save(
            hsi_np=hsi_np,
            leaf_mask=leaf_mask,
            base_name = os.path.splitext(os.path.basename(image_path))[0],
            save_hsi_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/cropped_hdf5',
            save_mask_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/cropped_masks',
            save_preview_dir='/home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB',
            wavelengths = wlens,
            padding=10,
            min_pixels=1000,
            band_to_show=50,
            show=False
        )
        

In [65]:
save_cropped_leaves(image_paths[77:107])

Found 8 connected leaf components
Saved 5 leaves to:
 - HSI files:   /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_hdf5
 - Mask files:  /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_masks
 - Previews:    /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB
Found 8 connected leaf components
Saved 4 leaves to:
 - HSI files:   /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_hdf5
 - Mask files:  /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_masks
 - Previews:    /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB
Found 4 connected leaf components
Saved 4 leaves to:
 - HSI files:   /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_hdf5
 - Mask files:  /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_masks
 - Previews:    /home/r0979317/Documents/Thesis_Strawberries/Data/Crop_RGB
Found 7 connected leaf components
Saved 4 leaves to:
 - HSI files:   /home/r0979317/Documents/Thesis_Strawberries/Data/cropped_hdf5
 - Mask