### 2-7-TrainValSplit.ipynb

This notebook contains a list of the manually verified live bee images and splits wing images and segmentation masks into training and validation sets.

In [1]:
import os
import cv2
import json
import shutil
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from tqdm import tqdm
from glob import glob
from sklearn.model_selection import train_test_split

In [17]:
# Define directories
data_dir = Path("/mnt/c/Projects/Master/Data/")
seg_dir = data_dir / "Processed" / "LiveBees" / "7-LiveWingsSegmented"

input_dir = seg_dir / "3-combined-masks"
output_dir = seg_dir / "6-filtered-masks"

DEBUG = False

In [7]:
# List of manually selected images verified to have no issues
filtered_images = [
    "Round01-Hive01-2024_06_05-h01bee23.png",
    "Round01-Hive01-2024_06_05-h01bee25.png",
    "Round01-Hive01-2024_06_05-h01bee36.png",
    "Round01-Hive01-2024_06_05-h01bee46.png",
    "Round01-Hive01-2024_06_11-h01b02.png",
    "Round01-Hive01-2024_06_11-h01b05.png",
    "Round01-Hive01-2024_06_11-h01b19.png",
    "Round01-Hive01-2024_06_11-h01b23.png",
    "Round01-Hive01-2024_06_11-h01b28.png",
    "Round01-Hive01-2024_06_11-h01b42.png",
    "Round01-Hive01-2024_06_19-h01b03.png",
    "Round01-Hive01-2024_06_19-h01b04.png",
    "Round01-Hive01-2024_06_19-h01b12.png",
    "Round01-Hive01-2024_06_19-h01b13.png",
    "Round01-Hive01-2024_06_19-h01b21.png",
    "Round01-Hive01-2024_06_19-h01b35.png",
    "Round01-Hive01-2024_06_19-h01b36.png",
    "Round01-Hive01-2024_06_19-h01b37.png",
    "Round01-Hive01-2024_06_19-h01b41.png",
    "Round01-Hive01-2024_06_19-h01b42.png",
    "Round01-Hive01-2024_06_19-h01b46.png",
    "Round01-Hive01-2024_06_19-h01b47.png",
    "Round01-Hive01-2024_06_27-h01b07.png",
    "Round01-Hive01-2024_06_27-h01b10.png",
    "Round01-Hive01-2024_06_27-h01b15.png",
    "Round01-Hive01-2024_06_27-h01b17.png",
    "Round01-Hive01-2024_06_27-h01b26.png",
    "Round01-Hive01-2024_06_27-h01b30.png",
    "Round01-Hive01-2024_06_27-h01b36.png",
    "Round01-Hive01-2024_06_27-h01b46.png",
    "Round01-Hive02-2024_06_07-h02bee01.png",
    "Round01-Hive02-2024_06_07-h02bee19.png",
    "Round01-Hive02-2024_06_07-h02bee41.png",
    "Round01-Hive02-2024_06_07-h02bee47.png",
    "Round01-Hive02-2024_06_07-h02bee50.png",
    "Round01-Hive02-2024_06_07-h02bee57.png",
    "Round01-Hive02-2024_06_07-h02bee62.png",
    "Round01-Hive02-2024_06_10-h02b01.png",
    "Round01-Hive02-2024_06_10-h02b10.png",
    "Round01-Hive02-2024_06_10-h02b12.png",
    "Round01-Hive02-2024_06_10-h02b15.png",
    "Round01-Hive02-2024_06_10-h02b36.png",
    "Round01-Hive02-2024_06_10-h02b47.png",
    "Round01-Hive02-2024_06_10-h02b67.png",
    "Round01-Hive02-2024_06_18-h02b01.png",
    "Round01-Hive02-2024_06_18-h02b16.png",
    "Round01-Hive02-2024_06_18-h02b27.png",
    "Round01-Hive02-2024_06_18-h02b31.png",
    "Round01-Hive02-2024_06_18-h02b34.png",
    "Round01-Hive02-2024_06_18-h02b43.png",
    "Round01-Hive02-2024_06_18-h02b44.png",
    "Round01-Hive02-2024_06_18-h02b45.png",
    "Round01-Hive02-2024_06_18-h02b46.png",
    "Round01-Hive02-2024_06_18-h02b47.png",
    "Round01-Hive02-2024_06_18-h02b50.png",
    "Round01-Hive02-2024_06_18-h02b51.png",
    "Round01-Hive02-2024_06_18-h02b57.png",
    "Round01-Hive02-2024_06_18-h02b62.png",
    "Round01-Hive02-2024_06_24-h02b01.png",
    "Round01-Hive02-2024_06_24-h02b10.png",
    "Round01-Hive02-2024_06_24-h02b12.png",
    "Round01-Hive02-2024_06_24-h02b14.png",
    "Round01-Hive02-2024_06_24-h02b16.png",
    "Round01-Hive02-2024_06_24-h02b17.png",
    "Round01-Hive02-2024_06_24-h02b29.png",
    "Round01-Hive02-2024_06_24-h02b31.png",
    "Round01-Hive02-2024_06_24-h02b42.png",
    "Round01-Hive02-2024_06_24-h02b47.png",
    "Round01-Hive02-2024_06_24-h02b48.png",
    "Round01-Hive02-2024_06_24-h02b57.png",
    "Round01-Hive02-2024_06_24-h02b62.png",
    "Round01-Hive02-2024_06_24-h02b67.png",
    "Round01-Hive02-2024_06_24-h02b71.png",
    "Round01-Hive03-2024_06_06-h003bee13.png",
    "Round01-Hive03-2024_06_06-h03bee15.png",
    "Round01-Hive03-2024_06_06-h03bee16.png",
    "Round01-Hive03-2024_06_06-h03bee20.png",
    "Round01-Hive03-2024_06_06-h03bee21.png",
    "Round01-Hive03-2024_06_06-h03bee25.png",
    "Round01-Hive03-2024_06_06-h03bee29.png",
    "Round01-Hive03-2024_06_06-h03bee35.png",
    "Round01-Hive03-2024_06_06-h03bee39.png",
    "Round01-Hive03-2024_06_06-h03bee52.png",
    "Round01-Hive03-2024_06_06-h03bee82.png",
    "Round01-Hive03-2024_06_14-h03b04.png",
    "Round01-Hive03-2024_06_14-h03b05.png",
    "Round01-Hive03-2024_06_14-h03b12.png",
    "Round01-Hive03-2024_06_14-h03b15.png",
    "Round01-Hive03-2024_06_14-h03b16.png",
    "Round01-Hive03-2024_06_14-h03b18.png",
    "Round01-Hive03-2024_06_14-h03b21.png",
    "Round01-Hive03-2024_06_14-h03b23.png",
    "Round01-Hive03-2024_06_14-h03b25.png",
    "Round01-Hive03-2024_06_14-h03b28.png",
    "Round01-Hive03-2024_06_14-h03b32.png",
    "Round01-Hive03-2024_06_14-h03b42.png",
    "Round01-Hive03-2024_06_14-h03b47.png",
    "Round01-Hive03-2024_06_14-h03b61.png",
    "Round01-Hive03-2024_06_20-h03b01.png",
    "Round01-Hive03-2024_06_20-h03b11.png",
    "Round01-Hive03-2024_06_20-h03b16.png",
    "Round01-Hive03-2024_06_20-h03b17.png",
    "Round01-Hive03-2024_06_20-h03b25.png",
    "Round01-Hive03-2024_06_20-h03b34.png",
    "Round01-Hive03-2024_06_20-h03b39.png",
    "Round01-Hive03-2024_06_20-h03b41.png",
    "Round01-Hive03-2024_06_25-h03b01.png",
    "Round01-Hive03-2024_06_25-h03b16.png",
    "Round01-Hive03-2024_06_25-h03b20.png",
    "Round01-Hive03-2024_06_25-h03b21.png",
    "Round01-Hive03-2024_06_25-h03b27.png",
    "Round01-Hive03-2024_06_25-h03b30.png",
    "Round01-Hive03-2024_06_25-h03b34.png",
    "Round01-Hive03-2024_06_25-h03b39.png",
    "Round01-Hive03-2024_06_25-h03b41.png",
    "Round01-Hive03-2024_06_25-h03b52.png",
    "Round01-Hive03-2024_06_25-h03b77.png",
    "Round01-Hive04-2024_06_07-h04bee02.png",
    "Round01-Hive04-2024_06_07-h04bee03.png",
    "Round01-Hive04-2024_06_07-h04bee14.png",
    "Round01-Hive04-2024_06_07-h04bee16.png",
    "Round01-Hive04-2024_06_07-h04bee22.png",
    "Round01-Hive04-2024_06_07-h04bee29.png",
    "Round01-Hive04-2024_06_07-h04bee41.png",
    "Round01-Hive04-2024_06_12-h04b05.png",
    "Round01-Hive04-2024_06_12-h04b21.png",
    "Round01-Hive04-2024_06_12-h04b24.png",
    "Round01-Hive04-2024_06_12-h04b31.png",
    "Round01-Hive04-2024_06_12-h04b32.png",
    "Round01-Hive04-2024_06_12-h04b35.png",
    "Round01-Hive04-2024_06_12-h04b37.png",
    "Round01-Hive04-2024_06_12-h04b44.png",
    "Round01-Hive04-2024_06_12-h04b51.png",
    "Round01-Hive04-2024_06_12-h04b55.png",
    "Round01-Hive04-2024_06_17-h04b01.png",
    "Round01-Hive04-2024_06_17-h04b07.png",
    "Round01-Hive04-2024_06_17-h04b16.png",
    "Round01-Hive04-2024_06_17-h04b17.png",
    "Round01-Hive04-2024_06_17-h04b19.png",
    "Round01-Hive04-2024_06_17-h04b32.png",
    "Round01-Hive04-2024_06_17-h04b36.png",
    "Round01-Hive04-2024_06_17-h04b41.png",
    "Round01-Hive04-2024_06_26-h04b14.png",
    "Round01-Hive04-2024_06_26-h04b17.png",
    "Round01-Hive04-2024_06_26-h04b37.png",
    "Round01-Hive04-2024_06_26-h04b41.png",
    "Round01-Hive04-2024_06_26-h04b42.png",
    "Round01-Hive04-2024_06_26-h04b48.png",
    "Round01-Hive04-2024_06_26-h04b50.png",
    "Round01-Hive05-2024_06_06-h05bee05.png",
    "Round01-Hive05-2024_06_06-h05bee50.png",
    "Round01-Hive05-2024_06_06-h05bee62.png",
    "Round01-Hive05-2024_06_13-h05b07.png",
    "Round01-Hive05-2024_06_13-h05b10.png",
    "Round01-Hive05-2024_06_13-h05b20.png",
    "Round01-Hive05-2024_06_13-h05b26.png",
    "Round01-Hive05-2024_06_13-h05b31.png",
    "Round01-Hive05-2024_06_13-h05b39.png",
    "Round01-Hive05-2024_06_13-h05b42.png",
    "Round01-Hive05-2024_06_13-h05b45.png",
    "Round01-Hive05-2024_06_13-h05b51.png",
    "Round01-Hive05-2024_06_13-h05b54.png",
    "Round01-Hive05-2024_06_13-h05b74.png",
    "Round01-Hive05-2024_06_13-h05b79.png",
    "Round01-Hive05-2024_06_21-h05b10.png",
    "Round01-Hive05-2024_06_21-h05b13.png",
    "Round01-Hive05-2024_06_21-h05b14.png",
    "Round01-Hive05-2024_06_21-h05b15.png",
    "Round01-Hive05-2024_06_21-h05b20.png",
    "Round01-Hive05-2024_06_21-h05b32.png",
    "Round01-Hive05-2024_06_21-h05b36.png",
    "Round01-Hive05-2024_06_21-h05b39.png",
    "Round01-Hive05-2024_06_21-h05b42.png",
    "Round01-Hive05-2024_06_21-h05b48.png",
    "Round01-Hive05-2024_06_21-h05b57.png",
    "Round01-Hive05-2024_06_21-h05b62.png",
    "Round01-Hive05-2024_06_21-h05b64.png",
    "Round01-Hive05-2024_06_21-h05b71.png",
    "Round01-Hive05-2024_06_21-h05b76.png",
    "Round01-Hive05-2024_06_21-h05b82.png",
    "Round01-Hive05-2024_06_28-h05b39.png",
    "Round01-Hive05-2024_06_28-h05b42.png",
    "Round01-Hive05-2024_06_28-h05b62.png",
    "Round01-Hive05-2024_06_28-h05b67.png",
    "Round01-Hive05-2024_06_28-h05b90.png",
    "Round02-hive11-2024_06_18-h11b01.png",
    "Round02-hive11-2024_06_18-h11b04.png",
    "Round02-hive11-2024_06_18-h11b11.png",
    "Round02-hive11-2024_06_18-h11b13.png",
    "Round02-hive11-2024_06_26-h11b01.png",
    "Round02-hive11-2024_06_26-h11b02.png",
    "Round02-hive11-2024_06_26-h11b07.png",
    "Round02-hive11-2024_06_26-h11b11.png",
    "Round02-hive11-2024_06_26-h11b12.png",
    "Round02-hive11-2024_06_26-h11b13.png",
    "Round02-hive11-2024_06_26-h11b14.png",
    "Round02-hive11-2024_06_26-h11b15.png",
    "Round02-hive11-2024_06_26-h11b16.png",
    "Round02-hive11-2024_06_26-h11b18.png",
    "Round02-hive11-2024_06_26-h11b20.png",
    "Round02-hive11-2024_06_26-h11b22.png",
    "Round02-hive11-2024_06_26-h11b26.png",
    "Round02-hive11-2024_06_26-h11b27.png",
    "Round02-hive11-2024_06_26-h11b28.png",
    "Round02-hive11-2024_07_01-h11b03.png",
    "Round02-hive11-2024_07_01-h11b10.png",
    "Round02-hive11-2024_07_01-h11b11.png",
    "Round02-hive11-2024_07_01-h11b12.png",
    "Round02-hive11-2024_07_01-h11b13.png",
    "Round02-hive11-2024_07_01-h11b14.png",
    "Round02-hive11-2024_07_01-h11b15.png",
    "Round02-hive11-2024_07_01-h11b17.png",
    "Round02-hive11-2024_07_01-h11b19.png",
    "Round02-hive11-2024_07_01-h11b20.png",
    "Round02-hive11-2024_07_01-h11b25.png",
    "Round02-hive11-2024_07_01-h11b28.png",
    "Round02-hive11-2024_07_11-h11b16.png",
    "Round02-hive11-2024_07_11-h11b17.png",
    "Round02-hive11-2024_07_11-h11b20.png",
    "Round02-hive11-2024_07_11-h11b25.png",
    "Round02-hive11-2024_07_11-h11b28.png",
    "Round02-hive12-2024_06_20-h12b03.png",
    "Round02-hive12-2024_06_20-h12b05.png",
    "Round02-hive12-2024_06_20-h12b10.png",
    "Round02-hive12-2024_06_20-h12b12.png",
    "Round02-hive12-2024_06_20-h12b16.png",
    "Round02-hive12-2024_06_20-h12b25.png",
    "Round02-hive12-2024_06_20-h12b26.png",
    "Round02-hive12-2024_06_20-h12b35.png",
    "Round02-hive12-2024_06_20-h12b38.png",
    "Round02-hive12-2024_06_20-h12b41.png",
    "Round02-hive12-2024_06_24-h02b01.png",
    "Round02-hive12-2024_06_24-h12b13.png",
    "Round02-hive12-2024_06_24-h12b16.png",
    "Round02-hive12-2024_06_24-h12b18.png",
    "Round02-hive12-2024_06_24-h12b19.png",
    "Round02-hive12-2024_06_24-h12b20.png",
    "Round02-hive12-2024_06_24-h12b23.png",
    "Round02-hive12-2024_06_24-h12b26.png",
    "Round02-hive12-2024_06_24-h12b27.png",
    "Round02-hive12-2024_06_24-h12b29.png",
    "Round02-hive12-2024_06_24-h12b35.png",
    "Round02-hive12-2024_06_24-h12b36.png",
    "Round02-hive12-2024_06_24-h12b37.png",
    "Round02-hive12-2024_06_24-h12b38.png",
    "Round02-hive12-2024_06_24-h12b40.png",
    "Round02-hive12-2024_07_05-h12b07.png",
    "Round02-hive12-2024_07_05-h12b10.png",
    "Round02-hive12-2024_07_05-h12b18.png",
    "Round02-hive12-2024_07_05-h12b20.png",
    "Round02-hive12-2024_07_05-h12b21.png",
    "Round02-hive12-2024_07_05-h12b23.png",
    "Round02-hive12-2024_07_05-h12b28.png",
    "Round02-hive12-2024_07_05-h12b29.png",
    "Round02-hive12-2024_07_05-h12b31.png",
    "Round02-hive12-2024_07_05-h12b35.png",
    "Round02-hive12-2024_07_05-h12b37.png",
    "Round02-hive12-2024_07_05-h12b40.png",
    "Round02-hive12-2024_07_05-h12b41.png",
    "Round02-hive12-2024_07_05-h12b43.png",
    "Round02-hive12-2024_07_08-h12b05.png",
    "Round02-hive12-2024_07_08-h12b07.png",
    "Round02-hive12-2024_07_08-h12b10.png",
    "Round02-hive12-2024_07_08-h12b12.png",
    "Round02-hive12-2024_07_08-h12b16.png",
    "Round02-hive12-2024_07_08-h12b18.png",
    "Round02-hive12-2024_07_08-h12b19.png",
    "Round02-hive12-2024_07_08-h12b20.png",
    "Round02-hive12-2024_07_08-h12b21.png",
    "Round02-hive12-2024_07_08-h12b25.png",
    "Round02-hive12-2024_07_08-h12b28.png",
    "Round02-hive12-2024_07_08-h12b31.png",
    "Round02-hive12-2024_07_08-h12b37.png",
    "Round02-hive12-2024_07_08-h12b40.png",
    "Round02-hive12-2024_07_08-h12b41.png",
    "Round02-hive12-2024_07_08-h12b43.png",
    "Round02-hive13-2024_06_19-h13b05.png",
    "Round02-hive13-2024_06_19-h13b07.png",
    "Round02-hive13-2024_06_19-h13b13.png",
    "Round02-hive13-2024_06_19-h13b15.png",
    "Round02-hive13-2024_06_19-h13b18.png",
    "Round02-hive13-2024_06_19-h13b26.png",
    "Round02-hive13-2024_06_19-h13b27.png",
    "Round02-hive13-2024_06_19-h13b37.png",
    "Round02-hive13-2024_06_28-h13b02.png",
    "Round02-hive13-2024_06_28-h13b03.png"
    ]

In [8]:
# Create output_dir if it doesn't exist
output_dir.mkdir(parents=True, exist_ok=True)

# Copy each selected image
for filename in tqdm(filtered_images, desc="Processing files", ncols=145):
    src = input_dir / filename
    dst = output_dir / filename
    if src.exists():
        shutil.copyfile(src, dst)
    else:
        print(f"Warning: {src} not found")

Processing files: 100%|████████████████████████████████████████████████████████████████████████████████████████| 286/286 [00:03<00:00, 84.44it/s]


In [16]:
# Define directories
data_dir = Path("/mnt/c/Projects/Master/Data")

input_img_dir = seg_dir / "0-LiveWings-png"
input_mask_dir = seg_dir / "6-filtered-masks"
output_base_dir = seg_dir  / "7-train-val"

In [15]:
# Create list of filenames
fns = [Path(fp).name for fp in glob(str(input_mask_dir / "*"))]

# Split into train and validation sets
train_fns, val_fns = train_test_split(fns, random_state=42, train_size=0.9)

# Iterate over sets 
for subset_name, file_list in zip(['train', 'val'], [train_fns, val_fns]):
    image_out_dir = output_base_dir / subset_name / "images"
    mask_out_dir = output_base_dir / subset_name / "masks"
    
    # Create output directories (only once)
    image_out_dir.mkdir(parents=True, exist_ok=True)
    mask_out_dir.mkdir(parents=True, exist_ok=True)
    
    for fn in tqdm(file_list, desc="Processing files", ncols=145):
        src_img = input_img_dir / fn
        src_mask = input_mask_dir / fn

        dst_img = image_out_dir / fn
        dst_mask = mask_out_dir / fn

        shutil.copyfile(src_img, dst_img)
        shutil.copyfile(src_mask, dst_mask)

Processing files: 100%|████████████████████████████████████████████████████████████████████████████████████████| 257/257 [00:06<00:00, 41.61it/s]
Processing files: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 39.33it/s]
