### scan-cropping.ipynb

This notebook is used to loop through all Wing Scans, segment them into the individual wings and use an OCR to extract the label number.

In [1]:
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import sys
import os
import cv2
import time

from PIL import Image
from scipy.ndimage import rotate

import pytesseract
import easyocr

import warnings

In [28]:
# Define directories
input_dir = "/mnt/c/Projects/Master/Data/WingImages/WingScans/"
output_dir = "/mnt/c/Projects/Master/Data/Processed/Scans/1-ScanWingCrops/"

In [21]:
# Functions
def generate_label_mask(scan):
    """
    Finds the labels on the sheet and returns a mask for the image.
    """
    # Grayscale image
    gray = cv2.cvtColor(scan, cv2.COLOR_BGR2GRAY)
    
    # Apply a binary threshold
    _, thresholded = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    
    # Create a new image to show the contours
    contour_img = scan.copy()
    
    # Find contours
    contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    large_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 1000000]
    
    # Create an empty mask
    mask = np.zeros(contour_img.shape[:2], dtype=np.int32)  
    
    # Filter the contours and label them in the mask
    label = 1 
    for contour in large_contours:
        # Get the bounding rectangle around the contour
        x, y, w, h = cv2.boundingRect(contour)
        
        # Filter based on height and length
        if w > 1000 and h > 1000: 
            # Draw a rectangle around the large contour
            cv2.rectangle(contour_img, (x, y), (x + w, y + h), (0, 255, 0), 30)
    
            # Fill the mask with a unique label for each contour
            cv2.rectangle(mask, (x, y), (x + w, y + h), (label), thickness=cv2.FILLED)
            label += 1
        
    return mask
    
def easyocr_read_labels(scan, mask, file_base_name, save_ocr_results=False):
    """
    Uses an OCR to read labels. The Identified text is returned in a dictionary.
    """
    # Find unique labels
    unique_labels = np.unique(mask)
    unique_labels = unique_labels[unique_labels != 0]
    
    # Create a dictionary with keys from unique_labels
    label_dict = {int(label): None for label in unique_labels}
    
    # Loop through each label and extract the corresponding region
    for label in unique_labels:
        # Create a binary mask for the current label
        label_mask = np.zeros_like(mask, dtype=np.uint8)
        label_mask[mask == label] = 1
        
        # Crop the label from the scan
        crop_label = scan[np.ix_(label_mask.any(1), label_mask.any(0))]
        # Crop a small border in case of tilted images
        crop_label = crop_label[40:crop_label.shape[0]-40, 40:crop_label.shape[1]-40]
    
        # Crop the number from the label
        # Format: [y1:y2, x1:x2]
        crop_num = crop_label[:, 0:1500] 
            
        # Reduce crop resolution for better ocr performence
        scale_percent = 0.50 
        new_width = int(crop_num.shape[1] * scale_percent)
        new_height = int(crop_num.shape[0] * scale_percent)
        crop_num = cv2.resize(crop_num, (new_width, new_height))

        # Perform OCR on the croped number
        results = reader.readtext(crop_num, allowlist=allow_list)

        if not results:
            print(f"\tWarning: Easyocr did not return any text when reading label {label}!")
            ocr_result = "XX"
        else:
            # Find the result with the highest confidence
            best_result = max(results, key=lambda x: x[2])
            ocr_result = best_result[1]

        # Add a "w" if only one was identified
        if ocr_result.startswith("w") and not ocr_result.startswith("ww"):
            ocr_result = "" + ocr_result
            
        # Add the result to the label dictionary
        label_dict[int(label)] = ocr_result.strip()
        
        # Save the image and OCR result for easy performance validation
        if save_ocr_results:
            label_name = f"{file_base_name}_Label_{label}"
            
            # Show the region corresponding to the current label
            fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, figsize=(7, 2))
            ax0.imshow(cv2.cvtColor(crop_label, cv2.COLOR_BGR2RGB))
            ax0.axis('off')
            ax1.imshow(cv2.cvtColor(crop_num, cv2.COLOR_BGR2RGB))
            ax1.axis('off')
            plt.suptitle(f'{label_name}: "{label_dict[int(label)]}"', fontsize=16)
            plt.savefig(f"{save_ocr_results}{label_name}.jpg")
            # plt.show()
            plt.close()
        
    return label_dict

def filter_text_contours(image, contours, threshold=0.2):
    """
    Filters out contours with a high concentration of black pixels. 
    This is done to remove black text that might be wrongfully identified as a wing.
    """
    wing_contours = []

    for contour in contours:
        # Create a mask for the current contour
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.drawContours(mask, [contour], -1, 255, thickness=cv2.FILLED)

        # Extract the region  from the image 
        masked_image = cv2.bitwise_and(image, image, mask=mask)
        
        total_pixels = cv2.countNonZero(mask)
        black_pixels = total_pixels - cv2.countNonZero(masked_image)

        # Show percentage
        # plt.figure(figsize=(10, 10))
        # plt.imshow(masked_image, cmap="gray")
        # plt.axis('off')
        # plt.show()
        # print(f"Percentage of black pixels: {black_pixels / total_pixels} %")
        
        # If the percentage of black pixels is below the threshold, keep the contour
        if black_pixels / total_pixels < threshold:
            wing_contours.append(contour)

    return wing_contours


def process_and_save_wing(crop, contour, out_file, m=20):
    """
    Crops the wing from the label and saves the image.
    """
    # The boundaries of the wing with a margin (m) are calculated
    x, y, w, h = cv2.boundingRect(contour)
    wing = crop[y-m:y+h+m, x-m:x+w+m]

    # The set margin (m) can result in an attempt to crop outside of the image. 
    # In this case the margin is reduced by one until this is no longer the case. 
    if not wing.any():
        while m >= 0:
            m -= 1
            wing = crop[y-m:y+h+m, x-m:x+w+m]
            if wing.any():
                break

    # The croped Wing is saved
    wing = cv2.cvtColor(wing, cv2.COLOR_BGR2RGB)
    wing = Image.fromarray(wing)
    wing.save(out_file)


def identify_wings(mask, scan, out_files_dir, file_base_name, label_dict):
    """
    Loops through the labels and identifies the wings.
    """
    # Find unique labels except for the background
    unique_labels = np.unique(mask)
    unique_labels = unique_labels[unique_labels != 0]

    # Report number of identified wings for easy validation
    num_0_wings = 0
    num_1_wings = 0
    num_2_wings = 0
    num_3_wings = 0

    # Guarantee a unique wing name
    wing_counter = 1
    
    # Loop through each label and extract the corresponding region
    for label in unique_labels:
        # Create a binary mask for the current label
        label_mask = np.zeros_like(mask, dtype=np.uint8)
        label_mask[mask == label] = 1

        # Extract the label from the scan
        crop = scan[np.ix_(label_mask.any(1), label_mask.any(0))]
        # Crop a small border in case of tilted images
        crop = crop[40:crop.shape[0]-40, 40:crop.shape[1]-40]
    
        gray_crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
        # blurred_crop = cv2.medianBlur(gray_crop, 5)
        _, thresh = cv2.threshold(gray_crop, 200, 255, cv2.THRESH_BINARY_INV)
        
        # Find contours
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        large_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 50000]
        
        # Filter out contours with a high percentage of black pixels
        filtered_contours = filter_text_contours(gray_crop, large_contours, threshold=0.2)
            
        # Sort the contours from left to right (by the x-coordinate of their bounding box)
        sorted_contours = sorted(filtered_contours, key=lambda cnt: cv2.boundingRect(cnt)[0])
    
        # Draw contours on the image for visualization
        contour_image = crop.copy()
        cv2.drawContours(contour_image, large_contours, -1, (0, 0, 0), thickness=cv2.FILLED)
    
        # Fill the contour area on the mask
        wing_mask = np.zeros_like(contour_image[:, :, 0]) 
        wing_mask = cv2.drawContours(wing_mask, large_contours, -1, 255, thickness=cv2.FILLED)
        mask_inv = cv2.bitwise_not(wing_mask)

        # Convert the color to an array with the same shape as the image
        color_layer = np.full_like(contour_image, (255, 255, 255))
        
        image_outside_colored = np.where(mask_inv[:, :, None] == 255, color_layer, crop)
        # image_outside_colored = np.where(mask_inv, gray_crop)
            
        # Crop and save identified wings 
        if len(sorted_contours) == 0:
            num_0_wings += 1
        
        elif len(sorted_contours) == 2:
            num_2_wings += 1
        
            out_file = f"{out_files_dir}{file_base_name}_Label_{label_dict[label]}_{wing_counter}_Left.jpg"
            process_and_save_wing(crop, sorted_contours[0], out_file)
            wing_counter += 1
            out_file = f"{out_files_dir}{file_base_name}_Label_{label_dict[label]}_{wing_counter}_Right.jpg"
            process_and_save_wing(crop, sorted_contours[1], out_file)
            wing_counter += 1
            
        elif len(sorted_contours) == 1:
            num_1_wings += 1
            
            out_file = f"{out_files_dir}{file_base_name}_Label_{label_dict[label]}_{wing_counter}_Only.jpg"
            process_and_save_wing(crop, sorted_contours[0], out_file)
            wing_counter += 1
        
        else:
            num_3_wings += 1
            
            for i, contour in enumerate(sorted_contours):
                out_file = f"{out_files_dir}{file_base_name}_Label_{label_dict[label]}_{wing_counter}_Wing-{i+1}.jpg"
                process_and_save_wing(crop, contour, out_file)
                wing_counter += 1

    
    # Print a sheet summary
    print(f"Sheet summary:")
    print(f"\t{num_0_wings} labels with no wings")
    print(f"\t{num_1_wings} labels with one wing")
    print(f"\t{num_2_wings} labels with two wings")
    print(f"\t{num_3_wings} labels with more than two wings")

In [22]:
# Start a timer 
start = time.time()

# Ensure the input directory exists
if not os.path.exists(input_dir):
    raise FileNotFoundError(f"Input directory '{input_dir}' was not found.")

# Create the output directories
os.makedirs(output_dir, exist_ok=True)
ocr_dir = output_dir + "OCR/"
os.makedirs(ocr_dir, exist_ok=True)

# Set up easyocr
warnings.filterwarnings("ignore", category=FutureWarning)
reader = easyocr.Reader(['en'])
allow_list='w0123456789'

# Find tif files
tif_files = [file for file in os.listdir(input_dir) if file.endswith('.tif')]

# Loop through all scans
total_files = len(tif_files)
digits = len(str(total_files))
for i, filename in enumerate(tif_files, 1):
    # Identify file/path components
    input_file = os.path.join(input_dir, filename)
    file_base_name = filename.split(".")[0]

    # Create a directory for every input file
    out_files_dir = output_dir + file_base_name + "/"
    
    # Skip if output file exists
    if os.path.exists(out_files_dir):
        print(f"Output already exists. Skipping File {i:0{digits}}/{total_files}:\t{filename}")
        continue

    # Create output directory
    os.makedirs(out_files_dir)

    # Show progress
    print(f"Processing File {i:0{digits}}/{total_files}:\t{filename}")

    # Open and rotate the tif file
    scan = cv2.imread(input_file)
    scan = rotate(scan, 90)

    # Identify the labels
    mask = generate_label_mask(scan)

    # Identify the text on the label
    label_dict = easyocr_read_labels(scan, mask, file_base_name, save_ocr_results=ocr_dir)

    # Crop the wings and save them in the directory
    identify_wings(mask, scan, out_files_dir, file_base_name, label_dict)

    # Destroying All the windows 
    cv2.destroyAllWindows() 

# End the timer 
end = time.time()
duration = end - start

# Convert to hours, minutes, and seconds
hours = int(duration // 3600)
minutes = int((duration % 3600) // 60)
seconds = int(duration % 60)

# Print the runtime in hh:mm:ss format
print(f"Runtime (hh:mm:ss): {hours:02}:{minutes:02}:{seconds:02}")

Processing File 1/1:	HiveC5_Sheet_02.tif
Sheet summary:
	0 labels with no wings
	0 labels with one wing
	20 labels with two wings
	0 labels with more than two wings
Runtime (hh:mm:ss): 00:02:19


In [23]:
# List of files to remove with text and markings that are missidentified as wings 
markings = ["HiveC5_Sheet_02_Label_26_58_Wing-1",
            "HiveC5_Sheet_02_Label_26_60_Wing-3",
            "HiveC5_Sheet_02_Label_27_42_Wing-1",
            "HiveC5_Sheet_02_Label_27_44_Wing-3",
            "HiveC5_Sheet_02_Label_28_26_Wing-1",
            "HiveC5_Sheet_02_Label_28_28_Wing-3",
            "HiveC5_Sheet_02_Label_29_11_Wing-1",
            "HiveC5_Sheet_02_Label_31_76_Wing-1",
            "HiveC5_Sheet_02_Label_32_62_Wing-1",
            "HiveC5_Sheet_02_Label_33_46_Wing-1",
            "HiveC5_Sheet_02_Label_33_48_Wing-3",
            "HiveC5_Sheet_02_Label_34_30_Wing-1",
            "HiveC5_Sheet_02_Label_34_32_Wing-3",
            "HiveC5_Sheet_02_Label_35_14_Wing-1",
            "HiveC5_Sheet_02_Label_35_16_Wing-3",
            "HiveC5_Sheet_02_Label_36_1_Wing-1",
            "HiveC5_Sheet_02_Label_37_79_Wing-1",
            "HiveC5_Sheet_02_Label_37_81_Wing-3",
            "HiveC5_Sheet_02_Label_38_65_Wing-1",
            "HiveC5_Sheet_02_Label_38_67_Wing-3",
            "HiveC5_Sheet_02_Label_39_50_Wing-1",
            "HiveC5_Sheet_02_Label_39_52_Wing-3",
            "HiveC5_Sheet_02_Label_40_34_Wing-1",
            "HiveC5_Sheet_02_Label_40_36_Wing-3",
            "HiveC5_Sheet_02_Label_41_18_Wing-1",
            "HiveC5_Sheet_02_Label_41_20_Wing-3",
            "HiveC5_Sheet_02_Label_42_4_Wing-1",
            "HiveC5_Sheet_02_Label_43_83_Wing-1",
            "HiveC5_Sheet_02_Label_43_85_Wing-3",
            "HiveC5_Sheet_02_Label_44_69_Wing-1",
            "HiveC5_Sheet_02_Label_44_71_Wing-3",
            "HiveC5_Sheet_02_Label_45_54_Wing-1",
            "HiveC5_Sheet_02_Label_45_56_Wing-3",
            "HiveC5_Sheet_02_Label_46_38_Wing-1",
            "HiveC5_Sheet_02_Label_46_40_Wing-3",
            "HiveC5_Sheet_02_Label_47_22_Wing-1",
            "HiveC5_Sheet_02_Label_47_24_Wing-3",
            "HiveC5_Sheet_02_Label_48_7_Wing-1",
            "HiveC5_Sheet_02_Label_48_9_Wing-3",
            "HiveC5_Sheet_02_Label_25_73_Wing-1",
            "Hive05_Sheet_01_Label_ww92_20_Only",
            "Hive01_Sheet_03_Label_ww21_1_Left",
            "Hive01_Sheet_02_Label_121_37_Wing-3",
            "Hive07_Sheet_06_Label_0_1_Only",
            "Hive29_Sheet_03_Label_90_41_Only",
            "Hive38_Sheet_03_Label_0_1_Left",
            "Hive28_Sheet_01_Label_17_5_Only",
            "Hive38_Sheet_03_Label_0_2_Right",
            "Hive40_Sheet_03_Label_87_30_Right",
            "Hive06_Sheet_02_Label_54_21_Wing-1",
            "Hive06_Sheet_02_Label_54_24_Wing-4",
            "Hive07_Sheet_01_Label_42_1_Wing-1",
            "Hive07_Sheet_01_Label_42_3_Wing-3",
            "Hive07_Sheet_04_Label_118_23_Wing-1",
            "Hive08_Sheet_04_Label_135_29_Wing-1",
            "Hive08_Sheet_06_Label_57_5_Wing-1",
            "Hive10_Sheet_04_Label_119_7_Wing-1",
            "Hive10_Sheet_05_Label_144_32_Wing-1",
            "Hive39_Sheet_01_Label_22_33_Wing-1",
            "Hive10_Sheet_05_Label_161_3_Wing-1",
            "Hive40_Sheet_04_Label_127_5_Wing-1"]

In [24]:
# List all output files
out_files = []
for root, _, files in os.walk(output_dir):
    for file in files:
        if file.endswith(".JPG") or file.endswith(".jpg"):
            out_files.append(os.path.join(root, file))

# Get full paths to remove
files_to_remove = [path for path in out_files if os.path.splitext(os.path.basename(path))[0] in markings]

# Delete the files
for file_path in files_to_remove:
    os.remove(file_path)

In [25]:
# Dictionary of files to rename
rename_to = {"Hive01_Sheet_02_Label_121_35_Wing-1": "Hive01_Sheet_02_Label_121_35_Left_x",
             "Hive01_Sheet_02_Label_121_36_Wing-2": "Hive01_Sheet_02_Label_121_36_Right_x",
             "Hive01_Sheet_02_Label_w16_21_Left": "Hive01_Sheet_02_Label_ww16_21_Left",
             "Hive01_Sheet_02_Label_w16_22_Right": "Hive01_Sheet_02_Label_ww16_22_Right",
             "Hive01_Sheet_03_Label_ww49_5_Only": "Hive01_Sheet_03_Label_ww49_5_Right",
             "Hive02_Sheet_01_Label_33_21_Only": "Hive02_Sheet_01_Label_33_21_Right",
             "Hive02_Sheet_02_Label_64_35_Only": "Hive02_Sheet_02_Label_64_35_Left",
             "Hive02_Sheet_03_Label_ww19_7_Only": "Hive02_Sheet_03_Label_ww19_7_Right",
             "Hive02_Sheet_03_Label_ww35_20_Only": "Hive02_Sheet_03_Label_ww35_20_Right",
             "Hive03_Sheet_01_Label_31_18_Only": "Hive03_Sheet_01_Label_31_18_Left",
             "Hive03_Sheet_01_Label_115_23_Only": "Hive03_Sheet_01_Label_115_23_Right",
             "Hive03_Sheet_01_Label_ww33_9_Only": "Hive03_Sheet_01_Label_ww33_9_Right",
             "Hive03_Sheet_01_Label_w41_5_Left": "Hive03_Sheet_01_Label_ww41_5_Left",
             "Hive03_Sheet_01_Label_w41_6_Right": "Hive03_Sheet_01_Label_ww41_6_Right",
             "Hive03_Sheet_01_Label_ww7_16_Left": "Hive03_Sheet_01_Label_ww17_16_Left",
             "Hive03_Sheet_01_Label_ww7_17_Right": "Hive03_Sheet_01_Label_ww17_17_Right",
             "Hive05_Sheet_01_Label_118_19_Only": "Hive05_Sheet_01_Label_118_19_Right",
             "Hive05_Sheet_01_Label_w116_3_Left": "Hive05_Sheet_01_Label_ww16_3_Left",
             "Hive05_Sheet_01_Label_w116_4_Right": "Hive05_Sheet_01_Label_ww16_4_Right",
             "Hive06_Sheet_02_Label_54_22_Wing-2": "Hive06_Sheet_02_Label_54_22_Left",
             "Hive06_Sheet_02_Label_54_23_Wing-3": "Hive06_Sheet_02_Label_54_23_Right",
             "Hive06_Sheet_04_Label_127_7_Left": "Hive06_Sheet_04_Label_127_7_Left_x",
             "Hive06_Sheet_04_Label_127_8_Right": "Hive06_Sheet_04_Label_127_8_Right_x",
             "Hive06_Sheet_04_Label_129_35_Left": "Hive06_Sheet_04_Label_129_35_Left_x",
             "Hive06_Sheet_04_Label_129_36_Right": "Hive06_Sheet_04_Label_129_36_Right_x",
             "Hive06_Sheet_04_Label_135_41_Left": "Hive06_Sheet_04_Label_135_41_Left_x",
             "Hive06_Sheet_04_Label_135_42_Right": "Hive06_Sheet_04_Label_135_42_Right_x",
             "Hive06_Sheet_05_Label_129_19_Left": "Hive06_Sheet_05_Label_129_19_Left_x",
             "Hive06_Sheet_05_Label_129_20_Right": "Hive06_Sheet_05_Label_129_20_Right_x",
             "Hive06_Sheet_05_Label_135_21_Left": "Hive06_Sheet_05_Label_135_21_Left_x",
             "Hive06_Sheet_05_Label_135_22_Right": "Hive06_Sheet_05_Label_135_22_Right_x",
             "Hive06_Sheet_05_Label_127_35_Left": "Hive06_Sheet_05_Label_127_35_Left_x",
             "Hive06_Sheet_05_Label_127_36_Right": "Hive06_Sheet_05_Label_127_36_Right_x",
             "Hive07_Sheet_01_Label_42_2_Wing-2": "Hive07_Sheet_01_Label_42_2_Left",
             "Hive07_Sheet_01_Label_42_4_Wing-4": "Hive07_Sheet_01_Label_42_4_Right",
             "Hive07_Sheet_03_Label_101_26_Only": "Hive07_Sheet_03_Label_101_26_Left",
             "Hive07_Sheet_03_Label_112_17_Only": "Hive07_Sheet_03_Label_112_17_Right",
             "Hive07_Sheet_04_Label_118_24_Wing-2": "Hive07_Sheet_04_Label_118_24_Left_x",
             "Hive07_Sheet_04_Label_118_25_Wing-3": "Hive07_Sheet_04_Label_118_25_Right_x",
             "Hive07_Sheet_05_Label_119_33_Left": "Hive07_Sheet_05_Label_119_33_Left_x",
             "Hive07_Sheet_05_Label_119_34_Right": "Hive07_Sheet_05_Label_119_34_Right_x",
             "Hive07_Sheet_06_Label_118_8_Left": "Hive07_Sheet_06_Label_118_8_Left_x",
             "Hive07_Sheet_06_Label_118_9_Right": "Hive07_Sheet_06_Label_118_9_Right_x",
             "Hive08_Sheet_03_Label_XX_7_Only": "Hive08_Sheet_03_Label_7_7_Right",
             "Hive08_Sheet_04_Label_135_30_Wing-2": "Hive08_Sheet_04_Label_135_30_Left",
             "Hive08_Sheet_04_Label_135_31_Wing-3": "Hive08_Sheet_04_Label_135_31_Right",
             "Hive08_Sheet_06_Label_57_6_Wing-2": "Hive08_Sheet_06_Label_57_6_Left",
             "Hive08_Sheet_06_Label_57_7_Wing-3": "Hive08_Sheet_06_Label_57_7_Right",
             "Hive08_Sheet_06_Label_75_30_Only": "Hive08_Sheet_06_Label_75_30_Left",
             "Hive10_Sheet_01_Label_XX_7_Left": "Hive10_Sheet_01_Label_7_7_Left",
             "Hive10_Sheet_01_Label_XX_8_Right": "Hive10_Sheet_01_Label_7_8_Right",
             "Hive10_Sheet_03_Label_87_35_Only": "Hive10_Sheet_03_Label_87_35_Left",
             "Hive10_Sheet_04_Label_119_8_Wing-2": "Hive10_Sheet_04_Label_119_8_Left",
             "Hive10_Sheet_04_Label_119_9_Wing-3": "Hive10_Sheet_04_Label_119_9_Right",
             "Hive10_Sheet_05_Label_144_33_Wing-2": "Hive10_Sheet_05_Label_144_33_Left",
             "Hive10_Sheet_05_Label_144_34_Wing-3": "Hive10_Sheet_05_Label_144_34_Right",
             "Hive10_Sheet_05_Label_161_4_Wing-2": "Hive10_Sheet_05_Label_161_4_Left",
             "Hive10_Sheet_05_Label_161_5_Wing-3": "Hive10_Sheet_05_Label_161_5_Right",
             "Hive11_Sheet_01_Label_1_27_Left": "Hive11_Sheet_01_Label_7_27_Left",
             "Hive11_Sheet_01_Label_1_28_Right": "Hive11_Sheet_01_Label_7_28_Right",
             "Hive12_Sheet_01_Label_23_35_Only": "Hive12_Sheet_01_Label_23_35_Left",
             "Hive12_Sheet_01_Label_1_23_Left": "Hive12_Sheet_01_Label_7_23_Left",
             "Hive12_Sheet_01_Label_1_24_Right": "Hive12_Sheet_01_Label_7_24_Right",
             "Hive13_Sheet_02_Label_ww32_15_Only": "Hive13_Sheet_02_Label_ww32_15_Right",
             "Hive14_Sheet_01_Label_ww18_17_Only": "Hive14_Sheet_01_Label_ww18_17_Left",
             "Hive15_Sheet_02_Label_90_13_Left": "Hive40_Sheet_02_Label_90_13_Left",
             "Hive15_Sheet_02_Label_90_14_Right": "Hive40_Sheet_02_Label_90_14_Right",
             "Hive15_Sheet_02_Label_83_17_Left": "Hive40_Sheet_02_Label_83_17_Left",
             "Hive15_Sheet_02_Label_83_18_Right": "Hive40_Sheet_02_Label_83_18_Right",
             "Hive16_Sheet_01_Label_XX_47_Left": "Hive16_Sheet_01_Label_7_47_Left",
             "Hive16_Sheet_01_Label_XX_48_Right": "Hive16_Sheet_01_Label_7_48_Right",
             "Hive16_Sheet_02_Label_38_11_Only": "Hive16_Sheet_02_Label_38_11_Left",
             "Hive16_Sheet_02_Label_44_1_Left": "Hive16_Sheet_02_Label_ww44_1_Left",
             "Hive16_Sheet_02_Label_44_2_Right": "Hive16_Sheet_02_Label_ww44_2_Right",
             "Hive16_Sheet_02_Label_42_20_Left": "Hive16_Sheet_02_Label_ww42_20_Left",
             "Hive16_Sheet_02_Label_42_21_Right": "Hive16_Sheet_02_Label_ww42_21_Right",
             "Hive17_Sheet_01_Label_XX_15_Left": "Hive17_Sheet_01_Label_7_15_Left",
             "Hive17_Sheet_01_Label_XX_16_Right": "Hive17_Sheet_01_Label_7_16_Right",
             "Hive18_Sheet_01_Label_10_7_Only": "Hive18_Sheet_01_Label_10_7_Left",
             "Hive21_Sheet_01_Label_20_15_Only": "Hive21_Sheet_01_Label_20_15_Left",
             "Hive25_Sheet_01_Label_70_33_Only": "Hive25_Sheet_01_Label_70_33_Right",
             "Hive28_Sheet_02_Label_33_39_Only": "Hive28_Sheet_02_Label_33_39_Left",
             "Hive28_Sheet_03_Label_84_21_Only": "Hive28_Sheet_03_Label_84_21_Right",
             "Hive28_Sheet_03_Label_93_32_Only": "Hive28_Sheet_03_Label_93_32_Left",
             "Hive27_Sheet_01_Label_XX_15_Left": "Hive27_Sheet_01_Label_7_15_Left",
             "Hive27_Sheet_01_Label_XX_16_Right": "Hive27_Sheet_01_Label_7_16_Right",
             "Hive30_Sheet_01_Label_XX_15_Left": "Hive30_Sheet_01_Label_7_15_Left",
             "Hive30_Sheet_01_Label_XX_16_Right": "Hive30_Sheet_01_Label_7_16_Right",
             "Hive38_Sheet_02_Label_44_7_Only": "Hive38_Sheet_02_Label_44_7_Right",
             "Hive39_Sheet_03_Label_104_17_Only": "Hive39_Sheet_03_Label_104_17_Left"
            }

In [27]:
# Rename the files
for file_path in out_files:
    # Extract the base name without extension
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Check if the base name exists in the rename_to dictionary
    if base_name in rename_to:
        # Construct new file path
        new_base_name = rename_to[base_name]
        new_file_path = os.path.join(os.path.dirname(file_path), f"{new_base_name}.jpg")

        # Rename the file
        os.rename(file_path, new_file_path)