# **Script to collect annotated images**

In [1]:
from google.colab import drive
drive.mount('/content/drive')
images_folder = "/content/drive/MyDrive"

Mounted at /content/drive


In [2]:
import os
import shutil
import pandas as pd

def collect_images_from_folders(folder_paths, excel_path, output_folder):
    """
    Copies images from given folders to a new folder based on image names from an Excel sheet.

    :param folder_paths: List of 5 folder paths containing images.
    :param excel_path: Path to the Excel sheet containing 'image_name' column.
    :param output_folder: Path to the output folder where matching images will be copied.
    """
    # Ensure output directory exists
    os.makedirs(output_folder, exist_ok=True)

    # Read the Excel file
    df = pd.read_excel(excel_path)

    # Get list of required image names
    image_names = set(df['image'].astype(str))

    # Track found images
    found_images = set()

    # Search in given folders
    for folder in folder_paths:
        if not os.path.exists(folder):
            print(f"Warning: Folder {folder} does not exist.")
            continue

        for file in os.listdir(folder):
            if file in image_names and file not in found_images:
                src_path = os.path.join(folder, file)
                dest_path = os.path.join(output_folder, file)
                shutil.copy2(src_path, dest_path)  # Copy with metadata
                found_images.add(file)

    print(f"Copied {len(found_images)} images to {output_folder}.")
    return found_images

In [3]:
# Example usage:
folder_paths = ["/content/drive/MyDrive/STP_Competition/cropped_images_habiba", '/content/drive/MyDrive/STP_Competition/cropped_images_haneen', '/content/drive/MyDrive/STP_Competition/cropped_images_mohammed',
                '/content/drive/MyDrive/STP_Competition/cropped_images_renad', '/content/drive/MyDrive/STP_Competition/cropped_images_shrouk']
excel_path = '/content/drive/MyDrive/STP_Competition/DataPreparation.xlsx'
output_folder = '/content/drive/MyDrive/STP_Competition/PrescriptionImagesData'
collect_images_from_folders(folder_paths, excel_path, output_folder)


Copied 1300 images to /content/drive/MyDrive/STP_Competition/PrescriptionImagesData.


{'Dental_prescription_544_crop_1.jpg',
 'Neurology_prescription_56_crop_2.jpg',
 'prescription_Gyne_134_crop_15.jpg',
 'Neurology_prescription_58_crop_4.jpg',
 'Dental_prescription_625_crop_9.jpg',
 'prescription_Gyne_358_crop_6.jpg',
 'Dental_prescription_607 (1)_crop_6.jpg',
 'prescription_Gyne_62_crop_6.jpg',
 'prescription_Gyne_134_crop_4.jpg',
 'prescription_Gyne_141_crop_9.jpg',
 'Dental_prescription_605 (1)_crop_7.jpg',
 'prescription_Gyne_334_crop_3.jpg',
 'Neurology_prescription_74_crop_3.jpg',
 'Dental_prescription_611_crop_12.jpg',
 'prescription_Gyne_403_crop_11.jpg',
 'Neurology_prescription_72_crop_5.jpg',
 'prescription_Gyne_419_crop_12.jpg',
 'Neurology_prescription_57_crop_7.jpg',
 'Neurology_prescription_60_crop_2.jpg',
 'prescription_Gyne_419_crop_14.jpg',
 'Dental_prescription_544_crop_2.jpg',
 'prescription_Gyne_141_crop_11.jpg',
 'prescription_Gyne_141_crop_10.jpg',
 'Neurology_prescription_64_crop_3.jpg',
 'Dental_prescription_605 (1)_crop_8.jpg',
 'prescription_