In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from skimage import draw
from tqdm import tqdm
import matplotlib.colors as mcolors
#from shapely.geometry import Polygon
#import rasterio
#from rasterio.features import rasterize
#from shapely.errors import TopologicalError


# Settings
input_root = "PREVIOUS_DATA"
output_folder = "PREVIOUS_CHECK"
blade_color = "orange"
vein_color = "magenta"
overlay_alpha = 0.7
save_dpi = 150

# Ensure output directory exists
os.makedirs(output_folder, exist_ok=True)

# Dataset identifiers and folder suffixes
datasets = {
    "MSU": ("msu_data", "msu_images"),
    "ORIGINAL": ("original_data", "original_images"),
    "UCD": ("ucd_data", "ucd_images")
}

def read_coords(filepath):
    return np.loadtxt(filepath, delimiter=None)

def rasterize_polygon(coords, shape):
    rr, cc = draw.polygon(coords[:,1], coords[:,0], shape=shape)
    return rr, cc

def resolve_image_path(image_folder, image_value):
    name, ext = os.path.splitext(image_value)
    candidates = []

    if ext:  # Already has extension
        candidates.append(image_value)
    else:
        candidates.extend([f"{image_value}.jpg", f"{image_value}.jpeg"])

    all_files = os.listdir(image_folder)
    for candidate in candidates:
        for file in all_files:
            if file.lower() == candidate.lower():
                return os.path.join(image_folder, file)

    raise FileNotFoundError(f"No image file found for base name: {image_value} in {image_folder}")


# Process all datasets
for prefix, (data_folder, image_folder) in datasets.items():
    data_path = os.path.join(input_root, data_folder)
    image_path = os.path.join(input_root, image_folder)

    info_files = [f for f in os.listdir(data_path) if f.endswith("_info.csv")]

    for info_file in tqdm(info_files, desc=f"Processing {prefix} dataset"):
        base_name = info_file.replace("_info.csv", "")
        info_path = os.path.join(data_path, info_file)

        # 👇 Use the correct blade file suffix based on dataset
        blade_suffix = "_blades.txt" if prefix == "UCD" else "_blade.txt"
        blade_path = os.path.join(data_path, base_name + blade_suffix)
        veins_path = os.path.join(data_path, base_name + "_veins.txt")

        # Load info CSV
        df_info = pd.read_csv(info_path)
        image_value = df_info.loc[df_info["factor"] == "image", "value"].values[0]
        try:
            image_full_path = resolve_image_path(image_path, image_value)
        except FileNotFoundError as e:
            print(f"❌ {e}")
            continue

        try:
            image = np.array(Image.open(image_full_path).convert("RGB"))
        except Exception as e:
            print(f"❌ Could not open image {image_value}: {e}")
            continue

        fig, ax = plt.subplots()
        ax.imshow(image)
        ax.axis("off")
        img_shape = image.shape[:2]

        # Overlay blade
        if os.path.exists(blade_path):
            blade_coords = read_coords(blade_path)
            rr_b, cc_b = rasterize_polygon(blade_coords, img_shape)
            mask_blade = np.zeros(img_shape, dtype=bool)
            mask_blade[rr_b, cc_b] = True

            overlay = np.zeros((*img_shape, 4), dtype=float)
            overlay[..., :3] = mcolors.to_rgb(blade_color)
            overlay[..., 3] = 0
            overlay[mask_blade, 3] = overlay_alpha
            ax.imshow(overlay)

        # Overlay veins
        if os.path.exists(veins_path):
            vein_coords = read_coords(veins_path)
            rr_v, cc_v = rasterize_polygon(vein_coords, img_shape)
            mask_vein = np.zeros(img_shape, dtype=bool)
            mask_vein[rr_v, cc_v] = True

            overlay = np.zeros((*img_shape, 4), dtype=float)
            overlay[..., :3] = mcolors.to_rgb(vein_color)
            overlay[..., 3] = 0
            overlay[mask_vein, 3] = overlay_alpha
            ax.imshow(overlay)

        # Save output
        save_name = f"{prefix}_{base_name}.png"
        save_path = os.path.join(output_folder, save_name)
        plt.savefig(save_path, bbox_inches="tight", pad_inches=0.1, dpi=save_dpi)
        plt.close()

print("✅ All datasets processed and saved to:", output_folder)




Processing MSU dataset:  20%|████                | 4/20 [00:20<01:25,  5.33s/it]

❌ No image file found for base name: user9_row_RSB_7_vine_4_cultivar_V.-acertifolia_rep_1_raw_image in PREVIOUS_DATA/msu_images
❌ No image file found for base name: user7_row_RSB_6_vine_4_cultivar_GRN-3_rep_2_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset:  35%|███████             | 7/20 [00:24<00:36,  2.81s/it]

❌ No image file found for base name: user27_row_RSB_1_vine_4_cultivar_AxR1_rep_1_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset:  50%|█████████▌         | 10/20 [00:37<00:40,  4.00s/it]

❌ No image file found for base name: user27_row_RSB_1_vine_4_cultivar_AxR1_rep_2_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset:  60%|███████████▍       | 12/20 [00:42<00:27,  3.39s/it]

❌ No image file found for base name: user9_row_18_vine_1_cultivar_Cinsault_rep_2_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset:  70%|█████████████▎     | 14/20 [00:50<00:21,  3.58s/it]

❌ No image file found for base name: user9_row_RSB_7_vine_4_cultivar_V.-acertifolia_rep_2_raw_image in PREVIOUS_DATA/msu_images


Processing MSU dataset:  80%|███████████████▏   | 16/20 [01:00<00:16,  4.18s/it]

❌ No image file found for base name: user7_row_RSB_6_vine_4_cultivar_GRN-3_rep_1_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset:  90%|█████████████████  | 18/20 [01:08<00:08,  4.01s/it]

❌ No image file found for base name: user9_row_18_vine_1_cultivar_Cinsault_rep_1_raw_image.jpg in PREVIOUS_DATA/msu_images


Processing MSU dataset: 100%|███████████████████| 20/20 [01:12<00:00,  3.62s/it]
Processing ORIGINAL dataset: 100%|██████████████| 81/81 [07:31<00:00,  5.58s/it]
Processing UCD dataset: 100%|███████████████████| 51/51 [03:43<00:00,  4.39s/it]

✅ All datasets processed and saved to: PREVIOUS_CHECK



