# DataLab task 4

### moving the masks from one folder to "train_masks" and "val_masks" based on the images folder 

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Define paths
base_folder = r"/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset"
folders_to_check = ["train_images", "val_images"]

# Print all files in train_images and val_images
for folder in folders_to_check:
    folder_path = os.path.join(base_folder, folder)
    if os.path.exists(folder_path):
        print(f"Files in {folder_path}:")
        print(os.listdir(folder_path))
    else:
        print(f"Folder not found: {folder_path}")
 
# List of filenames to delete (include extensions)
files_to_delete = [
    "000_43-2-ROOT1-2023-08-08_pvd_OD0001_f6h1_02-Fish Eye Corrected.png",
    "000_43-18-ROOT1-2023-08-08_pvdCherry_OD001_Col0_01-Fish Eye Corrected.png",
    "008_43-2-ROOT1-2023-08-08_control_pH7_-Fe+B_col0_03-Fish Eye Corrected.png",
    "008_43-17-ROOT1-2023-08-08_pvdCherry_OD001_Col0_01-Fish Eye Corrected.png",
    "019_43-6-ROOT1-2023-08-08_control_pH7_-Fe+B_col0_01-Fish Eye Corrected.png",
    "019_43-19-ROOT1-2023-08-08_pvd_OD001_Col0_03-Fish Eye Corrected.png",
    "023_43-14-ROOT1-2023-08-08_pvdCherry_OD0001_f6h1_03-Fish Eye Corrected.png",
    "023_43-18-ROOT1-2023-08-08_pvd_OD001_f6h1_01-Fish Eye Corrected.png",
]
 
# Iterate through the specified folders
for folder in folders_to_check:
    folder_path = os.path.join(base_folder, folder)
    if os.path.exists(folder_path):
        for file_name in files_to_delete:
            file_path = os.path.join(folder_path, file_name)
            if os.path.exists(file_path):
                os.remove(file_path)
                print(f"Deleted: {file_path}")
            else:
                print(f"File not found (skipping): {file_path}")
    else:
        print(f"Folder not found (skipping): {folder_path}")
 
print("Deletion process complete.")


In [None]:
# Define the paths to the directories
root_dir = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4'  # Replace with the absolute path to the root folder
dataset_dir = os.path.join(root_dir, 'dataset')
masks_dir = os.path.join(root_dir, 'masks')

train_images_dir = os.path.join(dataset_dir, 'train_images')
train_masks_dir = os.path.join(dataset_dir, 'train_masks')

val_images_dir = os.path.join(dataset_dir, 'val_images')
val_masks_dir = os.path.join(dataset_dir, 'val_masks')

# Create the train_masks and val_masks directories if they don't exist
os.makedirs(train_masks_dir, exist_ok=True)
os.makedirs(val_masks_dir, exist_ok=True)

# Helper function to extract everything before "Corrected" in the filename
def extract_before_corrected(filename):
    """Extracts everything before the word 'Corrected'."""
    if 'Corrected' in filename:
        part_before_corrected = filename.split('Corrected', 1)[0]
        return part_before_corrected.rstrip('-_')  # Remove any trailing dashes or underscores
    return None

# Get list of blocks from train and val images
train_image_blocks = {extract_before_corrected(filename) for filename in os.listdir(train_images_dir) if filename.endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff'))}
val_image_blocks = {extract_before_corrected(filename) for filename in os.listdir(val_images_dir) if filename.endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff'))}

# Get list of mask filenames from the masks folder (only .tif and .tiff files)
mask_filenames = {filename for filename in os.listdir(masks_dir) if filename.endswith(('.tif', '.tiff'))}

# Print debug info to see what's being matched
print(f"Train image blocks: {train_image_blocks}")
print(f"Val image blocks: {val_image_blocks}")
print(f"Total masks found: {len(mask_filenames)}")

# Copy corresponding masks to train_masks and val_masks
for mask_filename in mask_filenames:
    mask_block = extract_before_corrected(mask_filename)  # Extract the block before "Corrected" from the mask filename
    source_path = os.path.join(masks_dir, mask_filename)
    
    if mask_block in train_image_blocks:
        dest_path = os.path.join(train_masks_dir, mask_filename)
        shutil.move(source_path, dest_path)
        print(f'Copied {mask_filename} to train_masks/')
    
    elif mask_block in val_image_blocks:
        dest_path = os.path.join(val_masks_dir, mask_filename)
        shutil.move(source_path, dest_path)
        print(f'Copied {mask_filename} to val_masks/')
    else:
        print(f'No match for mask {mask_filename} (block: {mask_block})')

print('Done copying masks to train_masks and val_masks.')


In [None]:
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    gray = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                 cv2.THRESH_BINARY_INV, 11, 2)
    gray = cv2.equalizeHist(gray)
    return gray, image

def detect_edges(gray):
    edges = cv2.Canny(gray, 50, 150) 
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))  
    return edges

def find_largest_bounding_box(edges, image_shape, min_size_ratio=0.3):
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    max_area = 0
    best_bbox = None
    min_width, min_height = min_size_ratio * image_shape[1], min_size_ratio * image_shape[0]

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        area = w * h
        if area > max_area and w >= min_width and h >= min_height:
            max_area = area
            best_bbox = (x, y, w, h)
    
    return best_bbox

def crop_to_square(image, bbox, padding=5):
    x, y, w, h = bbox
    size = max(w, h)  
    
    # Center crop around the bounding box
    cx, cy = x + w // 2, y + h // 2
    x1 = max(0, cx - size // 2)
    y1 = max(0, cy - size // 2)
    x2 = min(image.shape[1], x1 + size)
    y2 = min(image.shape[0], y1 + size)

    # Crop the image
    return image[y1:y2, x1:x2]

def crop_and_replace_images_in_folder(input_folder, min_size_ratio=0.2, padding=30):
    # List all image files in the folder
    for filename in os.listdir(input_folder):
        file_path = os.path.join(input_folder, filename)
        
        # Check if the file is an image (you can add more extensions if needed)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif', '.tiff')):
            # Read the image
            image = cv2.imread(file_path)
            
            # Preprocess the image
            gray, original = preprocess_image(image)
            edges = detect_edges(gray)
            
            # Find the bounding box
            bbox = find_largest_bounding_box(edges, gray.shape, min_size_ratio=min_size_ratio)  
            
            if bbox:
                # Crop the image to a square around the bounding box
                cropped = crop_to_square(original, bbox, padding=padding)
                
                # Replace the original image with the cropped image
                cv2.imwrite(file_path, cropped)  # Save the cropped image to the same path
                print(f"Cropped and replaced {filename}.")
            else:
                print(f"No bounding box detected for {filename}")
        else:
            print(f"{filename} is not an image file.")

# Example usage
input_folder = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset/val_images'  

# Call the function to crop and replace all images in the folder
crop_and_replace_images_in_folder(input_folder)


In [None]:
im = cv2.imread("/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset/train_images")

In [None]:
im_path = cv2.imread("/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset/val_masks/030_43-2-ROOT1-2023-08-08_pvdCherry_OD001_Col0_05-Fish Eye Corrected_seed_mask.tif")
height, width, channels = im_path.shape
print(width, height, channels)

In [None]:
mask_path = cv2.imread("dataset/val_masks/031_43-18-ROOT1-2023-08-08_pvd_OD0001_col-0_05-Fish Eye Corrected_root_mask.tif")
height, width, channels = im_path.shape
print(width, height, channels)

In [None]:
print(f'Image data type: {im_path.dtype}')
print(f'Mask data type: {mask_path.dtype}')
print(f'Image pixel values: {np.min(im_path)}-{np.max(im_path)}')
print(f'Mask pixel values: {np.min(mask_path)}-{np.max(mask_path)}')

In [None]:
def pad_image(image, patch_size=256):
    """
    Pads the image to make its dimensions a multiple of patch_size.
    """
    height, width, channels = image.shape if len(image.shape) == 3 else (*image.shape, 1)
    
    new_height = ((height // patch_size) + 1) * patch_size if height % patch_size != 0 else height
    new_width = ((width // patch_size) + 1) * patch_size if width % patch_size != 0 else width
    
    pad_height = new_height - height
    pad_width = new_width - width
    
    pad_top = pad_height // 2
    pad_bottom = pad_height - pad_top
    pad_left = pad_width // 2
    pad_right = pad_width - pad_left
    
    padded_image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    
    return padded_image

def extract_patches(image, patch_size=256):
    """
    Extracts non-overlapping patches of size patch_size x patch_size from the image.
    """
    patches = []
    height, width, channels = image.shape if len(image.shape) == 3 else (*image.shape, 1)
    
    for y in range(0, height, patch_size):
        for x in range(0, width, patch_size):
            patch = image[y:y + patch_size, x:x + patch_size]
            if patch.shape[0] == patch_size and patch.shape[1] == patch_size:
                patches.append(patch)
    
    return patches

def save_patches(patches, output_folder, image_index):
    """
    Saves each patch as an image file in the specified output folder with the new naming convention.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    for i, patch in enumerate(patches):
        patch_filename = os.path.join(output_folder, f"image{image_index}_patch{i+1:04d}.png")
        cv2.imwrite(patch_filename, patch)

def process_folder(image_folder, output_folder, patch_size=256):
    """
    Processes all images in the folder, renames them sequentially (image1, image2, etc.),
    extracts patches, and saves the patches in the new directory with new names.
    """
    image_index = 1  
    
    for filename in os.listdir(image_folder):
        if filename.endswith(('.png', '.jpg', '.jpeg', '.tif')): 
            image_path = os.path.join(image_folder, filename)
            
            print(f"Processing image: {filename}")
            
            image = cv2.imread(image_path)
            
            padded_image = pad_image(image, patch_size=patch_size)
            
            print(f"Padded image size for {filename}: {padded_image.shape}")
            
            image_patches = extract_patches(padded_image, patch_size=patch_size)
            
            save_patches(image_patches, output_folder, image_index)
            
            print(f"Saved patches for {filename} in {output_folder}")
            
            image_index += 1  
    
    print("All images processed successfully!")

# Define paths for images
base_path = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset'  # Update with the path to your image folder
new_base_path = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/ds'  # Update with the desired output folder

# Image paths
image_folder = os.path.join(base_path, 'val_images')  
output_folder = os.path.join(new_base_path, 'val_images1')  

# Patch size
patch_size = 256

# Process the image folder
process_folder(image_folder, output_folder, patch_size)


In [None]:
def pad_image(image, patch_size=256):
    height, width, channels = image.shape if len(image.shape) == 3 else (*image.shape, 1)
    
    # Proper rounding to the next multiple of patch_size
    new_height = ((height + patch_size - 1) // patch_size) * patch_size
    new_width = ((width + patch_size - 1) // patch_size) * patch_size
    
    pad_height = new_height - height
    pad_width = new_width - width
    
    pad_top = pad_height // 2
    pad_bottom = pad_height - pad_top
    pad_left = pad_width // 2
    pad_right = pad_width - pad_left
    
    padded_image = cv2.copyMakeBorder(image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    return padded_image


def extract_patches(image, patch_size=256):
    patches = []
    height, width, channels = image.shape if len(image.shape) == 3 else (*image.shape, 1)
    
    # Loop to avoid overshooting
    for y in range(0, height - patch_size + 1, patch_size):
        for x in range(0, width - patch_size + 1, patch_size):
            patch = image[y:y + patch_size, x:x + patch_size]
            if patch.shape[0] == patch_size and patch.shape[1] == patch_size:
                patches.append(patch)
    return patches


def save_patches(patches, output_folder, image_index):
    """
    Saves each patch as an image file in the specified output folder with the new naming convention.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    for i, patch in enumerate(patches):
        patch_filename = os.path.join(output_folder, f"image{image_index}_patch{i+1:04d}.png")
        cv2.imwrite(patch_filename, patch)

def process_folder(image_folder, output_folder, patch_size=256):
    """
    Processes all images in the folder, renames them sequentially (image1, image2, etc.),
    extracts patches, and saves the patches in the new directory with new names.
    """
    image_index = 1  
    
    for filename in os.listdir(image_folder):
        if filename.endswith('Corrected_root_mask.tif'): 
            image_path = os.path.join(image_folder, filename)
            
            print(f"Processing image: {filename}")
            
            image = cv2.imread(image_path)
            
            padded_image = pad_image(image, patch_size=patch_size)
            
            print(f"Padded image size for {filename}: {padded_image.shape}")
            
            image_patches = extract_patches(padded_image, patch_size=patch_size)
            
            # Save the patches with the new naming convention
            save_patches(image_patches, output_folder, image_index)
            
            print(f"Saved patches for {filename} in {output_folder}")
            
            image_index += 1  
    
    print("All images processed successfully!")

# Define paths for images
base_path = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/dataset'  
new_base_path = '/Users/celinewu/Documents/GitHub/2024-25b-fai2-adsai-CelineWu231265/datalab_tasks/task_4/ds'  

# Image paths
image_folder = os.path.join(base_path, 'val_masks')  
output_folder = os.path.join(new_base_path, 'val_masks1')  

# Patch size
patch_size = 256

# Process the image folder
process_folder(image_folder, output_folder, patch_size)


In [None]:
crop 
get size 
combine the masks 
apply size to padding 
patch using padding size 256
save the patches into x and y should be 9 patches per image 
check images and masks 