### 1-4-ScansWingOrientation.ipynb

Flips upside-down wings by detecting the black color at the top and base of the wing.

In [1]:
import numpy as np
import cv2
import sys
import os

from PIL import Image
from pathlib import Path
from tqdm import tqdm

import matplotlib.pyplot as plt

In [2]:
# Define data directories
data_dir = Path("/mnt/g/Projects/Master/Data/")

input_dir = data_dir / "Processed" / "WingScans" / "3-ScanWingsAligned" / "Wings"
output_dir = data_dir / "Processed" / "WingScans" / "4-ScanWingsFlipped" 

DEBUG = False

In [3]:
def find_black_area(image, window_size):
    h, w = image.shape
    max_density = -1
    best_coords = (0, 0)

    # Slide the window over the image
    for y in range(0, h - window_size[1] + 1, 1):
        for x in range(0, w - window_size[0] + 1, 1):
            # Extract the window from the image
            window = image[y:y + window_size[1], x:x + window_size[0]]

            # Count the number of black pixels (assuming black pixels are 0)
            black_pixel_count = np.sum(window == 0)

            # Track the window with the maximum number of black pixels
            if black_pixel_count > max_density:
                max_density = black_pixel_count
                best_coords = (x+window_size[0]/2, y+window_size[1]/2)

    return best_coords

In [4]:
try:
    # Ensure the input directory exists
    if not os.path.exists(input_dir):
        raise FileNotFoundError(f"Input directory '{input_dir}' was not found.")
    
    # Create the output directories
    if os.path.exists(output_dir):
        print("WARNING: Output directory already exists.") 
    os.makedirs(output_dir, exist_ok=True)

    # Images where manual correction is necessary
    to_flip_lr = ["Hive28_Sheet_02_Label_57_4_Right", 
                  "Hive28_Sheet_03_Label_72_6_Right", 
                  "Hive36_Sheet_03_Label_79_24_Right", 
                  "Hive40_Sheet_04_Label_132_20_Left"]
    to_flip_tb = ["Hive02_Sheet_01_Label_33_21_Right",
                  "Hive05_Sheet_01_Label_ww94_15_Left", 
                  "Hive06_Sheet_04_Label_141_40_Right",
                  "Hive07_Sheet_01_Label_39_28_Right", 
                  "Hive09_Sheet_03_Label_82_37_Left", 
                  "Hive09_Sheet_03_Label_110_1_Left", 
                  "Hive09_Sheet_04_Label_135_34_Right", 
                  "Hive09_Sheet_05_Label_161_10_Right",
                  "Hive10_Sheet_03_Label_110_10_Right", 
                  "Hive25_Sheet_01_Label_30_17_Left", 
                  "Hive28_Sheet_03_Label_65_30_Left", 
                  "Hive40_Sheet_04_Label_127_7_Right", 
                  "HiveC4_Sheet_01_Label_48_1_Left"]
    
    # Loop through all jpg files
    jpg_files = tif_files = [file for file in os.listdir(input_dir) if file.endswith(".jpg")]
    for filename in tqdm(jpg_files, desc="Processing files", ncols=145):
        input_path = input_dir / filename
        output_path =  output_dir / filename

        # Skip if output file exists
        if os.path.exists(output_path):
            continue   
            
        # Load image
        image = cv2.imread(input_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
        # Grayscale image
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
        # Apply Gaussian Blur
        blurred_image = cv2.GaussianBlur(gray, (5, 5), 0)
    
        # Find a threshold with less than 3% black area
        threshold = 60
        while threshold >= 5:
            # Apply thresholding to get a binary image 
            _, thresh = cv2.threshold(blurred_image, threshold, 255, cv2.THRESH_BINARY)
            
            # Calculate the percentage of black pixels
            total_pixels = thresh.size
            black_pixels = np.count_nonzero(thresh == 0)
            black_percentage = (black_pixels / total_pixels) * 100
    
            # Use this threshold if less than 3% of pixels are black
            if black_percentage < 3:
                break
                
            # Decrease threshold until less than 3% of pixels are black
            threshold -= 5
        
        window_size = (50, 50)
        cord_x, cord_y = find_black_area(thresh, window_size)
        height, width, _ = image.shape
        
        if DEBUG:
            print(f"\nThreshold: {threshold}")
            print(f"Black Pixel Percentage: {black_percentage:.2f}%")
    
            # Get the middle coordinate of the image
            middle_x = width // 2
            middle_y = height // 2
            
            # Create a 1x2 figure
            plt.figure(figsize=(5, 5))
            plt.imshow(image)
            plt.axis("on")
            plt.tight_layout()
            plt.show()
            # Image 2
            plt.figure(figsize=(5, 5))
            plt.imshow(thresh, cmap="gray")
            plt.scatter(cord_x, cord_y, c="red", s=200, marker=".")
            plt.axvline(x=middle_x, c="red", linestyle="--", linewidth=2)
            plt.axhline(y=middle_y, c="red", linestyle="--", linewidth=2)
            plt.axis("on")
            plt.tight_layout()
            plt.show()
        # If the black area is on the right the image has to be flipped top to bottom
        basename = filename.removesuffix(".jpg")
        image = Image.fromarray(image)
        if cord_y > height/2:
            image = image.transpose(method=Image.FLIP_TOP_BOTTOM)
            if DEBUG:
                print(f"Flipped {basename} top to bottom") 
        if basename in to_flip_tb:
            image = image.transpose(method=Image.FLIP_TOP_BOTTOM)
        # If the black area is on the bottom the image has to be flipped left to right
        if cord_x > width/2:
            image = image.transpose(method=Image.FLIP_LEFT_RIGHT)
            if DEBUG:
                print(f"Flipped {basename} left to right")
        if basename in to_flip_lr:
            image = image.transpose(method=Image.FLIP_LEFT_RIGHT)
        image.save(output_path)

# Handle exceptions
except FileNotFoundError as e:
    print(e)
    
except KeyboardInterrupt:
    pass

Processing files: 100%|████████████████████████████████████████████████████████████████████████████████████| 4380/4380 [1:36:07<00:00,  1.32s/it]
