In [None]:
import os
from tqdm import tqdm
import shutil
import numpy as np

## Preprocessing RedEdge

In [None]:
def merge_channels_from_all_subsets(subsets, src_dir. dest_dir, channels):
    # For every subset
    for subset in tqdm(subsets):
        s_src_dir = os.path.join(src_dir, subset)
        # For every channel in each subset
        for channel in os.listdir(s_src_dir):
            if channel in channels:
                channel_dest_dir = os.path.join(dest_dir, channel)

                try: # create the directory if not created already
                    os.mkdir(channel_dest_dir)
                except:
                    pass
                channel_src_dir = os.path.join(src_dir, channel)
                print(f"Files to copy {len(os.listdir(channel_src_dir))}")
                # Copy every file
                for img in os.listdir(channel_src_dir):
                    new_img = f"{subset}_"+img
                    img_src_path = os.path.join(channel_src_dir, img)
                    img_dest_path = os.path.join(channel_dest_dir, new_img)
                    shutil.copy(img_src_path, img_dest_path)

### For Training set

In [None]:
def rededge_merge_channels_and_save(r_dir, g_dir, b_dir, nir_dir, re_dir, output_dir):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # List all files in the R directory (assuming other directories have the same file names)
    r_files = os.listdir(r_dir)
    
    for file_name in tqdm(r_files):
        # Construct the full paths to the R, G, B, NIR, and RE images
        r_path = os.path.join(r_dir, file_name)
        g_path = os.path.join(g_dir, file_name)
        b_path = os.path.join(b_dir, file_name)
        nir_path = os.path.join(nir_dir, file_name)
        re_path = os.path.join(re_dir, file_name)
        
        # Open the images
        r_image = Image.open(r_path)
        g_image = Image.open(g_path)
        b_image = Image.open(b_path)
        nir_image = Image.open(nir_path)
        re_image = Image.open(re_path)
        
        # Convert the images to numpy arrays
        r_array = np.array(r_image)
        g_array = np.array(g_image)
        b_array = np.array(b_image)
        nir_array = np.array(nir_image)
        re_array = np.array(re_image)
        
        # Stack the arrays along a new dimension to create a 5-channel image
        merged_array = np.stack((r_array, g_array, b_array, nir_array, re_array), axis=-1)
        
        # Save the merged array as a .npy file
        output_path = os.path.join(output_dir, os.path.splitext(file_name)[0] + '.npy')
        np.save(output_path, merged_array)

1. Merging 'R', 'G', 'B', 'NIR', 'RE' channels from each training subset '000', '001', '002', '004'

In [None]:
# for trainset
subsets = ['000', '001', '002', '004']
channels = ['R', 'G', 'B', 'NIR', 'RE']

dest_dir = "2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi"
src_dir = f"/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/"

merge_channels_from_all_subsets(subsets, src_dir, dest_dir, channels)
    

2. Merging All Channels

In [None]:
r_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/R'
g_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/G'
b_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/B'
nir_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/NIR'
re_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/RE'
output_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Trainset-Multi/RGBNIRRE'

# Call the function to merge channels and save as .npy files
rededge_merge_channels_and_save(r_directory, g_directory, b_directory, nir_directory, re_directory, output_directory)

### For Testset

1. Merging 'R', 'G', 'B', 'NIR', 'RE' channels from each training subset '003' testset

In [None]:
# for trainset
subsets = ['003']
channels = ['R', 'G', 'B', 'NIR', 'RE']

dest_dir = "2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi"
src_dir = f"/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/"

merge_channels_from_all_subsets(subsets, src_dir, dest_dir, channels)

2. Merging All channels

In [None]:
r_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/R'
g_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/G'
b_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/B'
nir_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/NIR'
re_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/RE'
output_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/RedEdge/Testset-Multi/RGBNIRRE'

# Call the function to merge channels and save as .npy files
rededge_merge_channels_and_save(r_directory, g_directory, b_directory, nir_directory, re_directory, output_directory)

## Preprocessing Sequoia

In [None]:
def sequoia_merge_channels_and_save(r_dir, g_dir, nir_dir, re_dir, output_dir):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # List all files in the R directory (assuming other directories have the same file names)
    r_files = os.listdir(r_dir)
    
    for file_name in tqdm(r_files):
        # Construct the full paths to the R, G, B, NIR, and RE images
        r_path = os.path.join(r_dir, file_name)
        g_path = os.path.join(g_dir, file_name)
        nir_path = os.path.join(nir_dir, file_name)
        re_path = os.path.join(re_dir, file_name)
        
        # Open the images
        r_image = Image.open(r_path)
        g_image = Image.open(g_path)
        nir_image = Image.open(nir_path)
        re_image = Image.open(re_path)
        
        # Convert the images to numpy arrays
        r_array = np.array(r_image)
        g_array = np.array(g_image)
        nir_array = np.array(nir_image)
        re_array = np.array(re_image)
        
        # Stack the arrays along a new dimension to create a 4-channel image
        merged_array = np.stack((r_array, g_array, nir_array, re_array), axis=-1)
        
        # Save the merged array as a .npy file
        output_path = os.path.join(output_dir, os.path.splitext(file_name)[0] + '.npy')
        np.save(output_path, merged_array)

### For Trainset

1. Merging 'R', 'G', 'NIR', 'RE' channels from each training subset '006', '007'

In [None]:

subsets = ['006', '007']
channels = ['R', 'G', 'NIR', 'RE']

dest_dir = "2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi"
src_dir = f"/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/"

merge_channels_from_all_subsets(subsets, src_dir, dest_dir, channels)

2. Merging All Channels

In [None]:
r_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi/R'
g_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi/G'
nir_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi/NIR'
re_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi/RE'
output_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Trainset-Multi/RGBNIRRE'

# Call the function to merge channels and save as .npy files
sequoia_merge_channels_and_save(r_directory, g_directory, nir_directory, re_directory, output_directory)

### For Testset

1. Merging 'R', 'G', 'NIR', 'RE' channels from each training subset '005'

In [None]:

subsets = ['005']
channels = ['R', 'G', 'NIR', 'RE']

dest_dir = "2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi"
src_dir = f"/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/"

merge_channels_from_all_subsets(subsets, src_dir, dest_dir, channels)

2. Merging all channels

In [None]:
r_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi/R'
g_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi/G'
nir_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi/NIR'
re_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi/RE'
output_directory = '/2018-weedMap-dataset-release/2018-weedMap-dataset-release/Tiles/Sequoia/Testset-Multi/RGBNIRRE'

# Call the function to merge channels and save as .npy files
sequoia_merge_channels_and_save(r_directory, g_directory, nir_directory, re_directory, output_directory)