# Organizing and Renaming Image Files for Analysis

Alejandro Sánchez A01662783 A01662783@tec.mx

Carlos Palmieri A01635776 A01635776@tec.mx

Cynthia Quijas A01655996 A01655996@tec.mx

Dabria Carrillo A01656716 A01656716@tec.mx

The "Copy_Images.ipynb" file contains the necessary code to copy images from a set of directories to a destination directory, renaming them according to a specific format that helps in later analysis. The main purpose of this script is to efficiently organize and transform the images, selecting only certain files and renaming them based on their source directory.

In [None]:
import os
import shutil  

from datetime import datetime


## Copy Images

This function copies specific image files from a parent directory to a destination directory.
It renames the files based on the folder structure to facilitate later analysis.

In [None]:
def copy_files(source_directory, destination_directory, search_folder, intermediate_folder):
    """
    This function copies specific image files from a parent directory to a destination directory.
    It renames the files based on the folder structure to facilitate later analysis.

    Args:
        source_directory (str): The path to the parent directory containing subfolders.
        destination_directory (str): The path where files will be copied to.
        search_folder (str): The folder within each subfolder where the images are located.
        intermediate_folder (str): An intermediate folder inside the search folder where images are stored.
    """
    # Define the exact names of the files to copy
    file_names = ['_Phi8.png', '_DIC8.png', '_Best.png']

    # Check if the destination directory exists, if not, create it
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # Loop through each folder in the source directory
    for folder in os.listdir(source_directory):
        folder_path = os.path.join(source_directory, folder)

        # Ensure it's a directory
        if os.path.isdir(folder_path):
            # Construct the path to the "derived" folder
            derived_folder_path = os.path.join(folder_path, 'derived', search_folder, intermediate_folder)

            # Skip if the "derived" folder does not exist
            if not os.path.exists(derived_folder_path):
                print(f'Folder not found: {derived_folder_path}. Skipping to the next folder.')
                continue

            # Loop through the files in the derived folder
            for file in os.listdir(derived_folder_path):
                if file in file_names:
                    source_file_path = os.path.join(derived_folder_path, file)
                    
                    # Create a new filename using the parent folder name for easier analysis
                    new_file_name = f"{folder}_{search_folder}_{intermediate_folder}_{file}"
                    destination_file_path = os.path.join(destination_directory, new_file_name)

                    # Copy the file to the destination directory with the new name
                    shutil.copy2(source_file_path, destination_file_path)
                    print(f'Copied: {source_file_path} to {destination_file_path}')

In [None]:
# Define the source directory and the destination directory
source_directory = r'Z:/Plate_1'  # Update this path as necessary

# Define the mapping of folders and the intermediate folders to search for
folders = { 
    'A1': ['02_04', '02_03', '03_03', '03_04'],
    'A2': ['02_06', '02_05', '03_06', '03_05'],
    'A3': ['03_06', '03_05', '04_06', '04_05'],
    'B1': ['02_04', '02_05', '03_05', '03_04'],
    'B2': ['01_05', '01_04', '02_05', '02_04'],
    'B3': ['02_04', '02_03', '03_04', '03_03']
}

# Iterate over the folder mapping and copy the files
for search_folder, intermediate_folders in folders.items():
    for intermediate_folder in intermediate_folders:
        destination_directory = rf'./data/processed/MyocyteImages/{search_folder}/{intermediate_folder}'  # Update this path as necessary
        copy_files(source_directory, destination_directory, search_folder, intermediate_folder)

## Deletes files outside a specified date range

This functions deletes files outside specific date range to select the necessary images for processing

In [None]:
def delete_files_outside_date_range(destination_directory, start_date, end_date):
    """
    This function deletes files in the destination directory if their names do not fall within the specified date range.

    Args:
        destination_directory (str): The directory containing the files to check.
        start_date (str): The start date in the format 'YYYY-MM-DD_HH-MM-SS'.
        end_date (str): The end date in the format 'YYYY-MM-DD_HH-MM-SS'.
    """
    # Convert the start and end dates to datetime objects
    start_date_dt = datetime.strptime(start_date, '%Y-%m-%d_%H-%M-%S')
    end_date_dt = datetime.strptime(end_date, '%Y-%m-%d_%H-%M-%S')

    # Loop through the files in the destination directory
    for file in os.listdir(destination_directory):
        file_path = os.path.join(destination_directory, file)

        # Ensure it's a file and contains a date in the name
        try:
            # Extract the date from the file name (assumes the date is at the beginning of the name)
            file_date_str = file.split('_')[0] + '_' + file.split('_')[1]
            file_date_dt = datetime.strptime(file_date_str, '%Y-%m-%d_%H-%M-%S')

            # Delete the file if it's outside the specified date range
            if file_date_dt < start_date_dt or file_date_dt > end_date_dt:
                os.remove(file_path)
                print(f"Deleted: {file_path}")

        except (IndexError, ValueError):
            # Ignore files that don't have a valid date format
            print(f"Ignored file (invalid date format): {file}")

In [None]:
# Define the mapping of folders and the intermediate folders to search for
folders = { 
    'A1': ['02_04', '02_03', '03_03', '03_04'],  # A1 has multiple intermediate folders
    'A2': ['02_06', '02_05', '03_06', '03_05'],  # A2 has a single intermediate folder
    'A3': ['03_06', '03_05', '04_06', '04_05'],
    'B1': ['02_04', '02_05', '03_05', '03_04'],  # B1 has multiple intermediate folders
    'B2': ['01_05', '01_04', '02_05', '02_04'],  # B2 has a single intermediate folder
    'B3': ['02_04', '02_03', '03_04', '03_03']   # B3 has no intermediate folders
}

# Define the date range for file filtering
start_date = '2024-08-14_18-23-48'
end_date = '2024-08-17_00-23-09'

# Loop through the folder structure and delete files outside the specified date range
for search_folder, intermediate_folders in folders.items():
    for intermediate_folder in intermediate_folders:
        destination_directory = rf'./data/processed/MyocyteImages/{search_folder}/{intermediate_folder}'  # Update the path if necessary
        delete_files_outside_date_range(destination_directory, start_date, end_date)


## Splitting data for training

Splits the dataset into training and validation sets by copying files.

In [None]:
import os
import shutil
import random

def split_data(source_folder, destination_folder, train_ratio=0.7):
    """
    Splits the dataset into training and validation sets by copying files.
    Args:
        source_folder (str): The path to the source folder containing the dataset.
        destination_folder (str): The path to the destination folder where 'train' and 'val' directories will be created.
        train_ratio (float): The proportion of data to be used for training (default is 0.7).
    """
    # Create destination folders for 'train' and 'val' if they do not exist
    train_folder = os.path.join(destination_folder, 'train')
    val_folder = os.path.join(destination_folder, 'val')
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(val_folder, exist_ok=True)
    
    # Get a list of PNG files in the source folder and their corresponding JSON files
    image_files = [f for f in os.listdir(source_folder) if f.endswith('.png')]
    file_pairs = [(f, f.replace('.png', '.txt')) for f in image_files if os.path.exists(os.path.join(source_folder, f.replace('.png', '.txt')))]
    
    # Shuffle the file pairs and split them into 'train' and 'val'
    random.shuffle(file_pairs)
    train_limit = int(len(file_pairs) * train_ratio)
    train_pairs = file_pairs[:train_limit]
    val_pairs = file_pairs[train_limit:]

    # Copy the files to the 'train' and 'val' folders
    for image, txt in train_pairs:
        shutil.copy(os.path.join(source_folder, image), os.path.join(train_folder, image))
        shutil.copy(os.path.join(source_folder, txt), os.path.join(train_folder, txt))
    
    for image, txt in val_pairs:
        shutil.copy(os.path.join(source_folder, image), os.path.join(val_folder, image))
        shutil.copy(os.path.join(source_folder, txt), os.path.join(val_folder, txt))
    
    # Print out how many files were copied to each folder
    print(f"Files copied to 'train': {len(train_pairs)}")
    print(f"Files copied to 'val': {len(val_pairs)}")

# Example usage for paths
source_folder = r'./data/processed/MyocyteImages'  # Adjust based on your source images
destination_folder = r'./data/processed/MyocyteImages'  # Adjust based on where you want to store train and val sets

split_data(source_folder, destination_folder)


## Applying filters to the images

In [None]:
import os
import cv2
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to apply filters to an image
def apply_filters(image_path):
    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    if image is None:
        logging.error(f"Error loading image: {image_path}")
        return None, None

    # Apply binarization using Otsu's method
    _, binary_image = cv2.threshold(image, 3, 255, cv2.THRESH_BINARY)

    # Invert the colors of the binary image
    inverted_image = cv2.bitwise_not(binary_image)

    # Apply median filter to remove salt-and-pepper noise
    salt_image = cv2.medianBlur(inverted_image, 11)

    # Apply bilateral filter to reduce noise while preserving edges
    bilateral_image = cv2.bilateralFilter(salt_image, 50, 90, 90)

    # Create a kernel for morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

    # Apply erosion
    eroded_image = cv2.erode(bilateral_image, kernel, iterations=1)

    # Apply dilation
    dilated_image = cv2.dilate(bilateral_image, kernel, iterations=1)

    # Return processed images and titles
    images = [dilated_image]
    titles = ['Dilated']

    return images, titles

# Function to save processed images
def save_images(images, titles, save_path, base_name):
    # Ensure the save directory exists
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Save each processed image
    for i, image in enumerate(images):
        save_image_path = os.path.join(save_path, f"{base_name}_{titles[i].replace(' ', '_')}.png")
        if image is not None:
            # Resize image before saving
            resized_image = cv2.resize(image, (640, 640))
            success = cv2.imwrite(save_image_path, resized_image)
            if success:
                logging.info(f"Image saved: {save_image_path}")
            else:
                logging.error(f"Failed to save the image: {save_image_path}")
        else:
            logging.warning(f"Error: Image {titles[i]} is None, unable to save.")

# Function to iterate through folders and apply filters to all images, saving the results
def apply_filters_and_save_images(base_directory, folders, save_path):
    for main_folder, subfolders in folders.items():
        for subfolder in subfolders:
            complete_directory = os.path.join(base_directory, main_folder, subfolder)

            # Check if the directory exists
            if not os.path.exists(complete_directory):
                logging.warning(f"Directory not found: {complete_directory}")
                continue

            # Iterate over all images in the current folder
            for file in os.listdir(complete_directory):
                image_path = os.path.join(complete_directory, file)

                # Check if it's an image file (assuming PNG format)
                if file.endswith('.png'):
                    logging.info(f"Processing: {image_path}")

                    # Apply filters to the image
                    images, titles = apply_filters(image_path)

                    # If images were processed, save them
                    if images:
                        # Generate base name from the original file
                        base_name = file.split('.')[0]

                        # Save the images in the desired directory
                        save_directory = os.path.join(save_path, main_folder, subfolder)
                        save_images(images, titles, save_directory, base_name)

# Directory structure
folders = {
    'B1': ['02_04', '02_05', '03_05', '03_04'],
    'B2': ['01_05', '01_04', '02_05', '02_04'],
    'B3': ['02_04', '02_03', '03_04', '03_03']
}

# Base directory where the main folders are located
base_directory = r'../data/raw/MyocyteImages/'

# Directory to save the processed images
save_directory = r'../data/processed/MyocyteImages'

# Apply filters and save the processed images
apply_filters_and_save_images(base_directory, folders, save_directory)

