# Data Augmentation for training

Alejandro Sánchez A01662783 A01662783@tec.mx

Carlos Palmieri A01635776 A01635776@tec.mx

Cynthia Quijas A01655996 A01655996@tec.mx

Dabria Carrillo A01656716 A01656716@tec.mx

In [None]:
import os
import cv2
import json

def transform_coordinates_double(coordinates, width, height):
    """
    Transforms coordinates by first rotating 90° and then flipping horizontally.
    """
    new_coordinates = []
    for x, y in coordinates:
        # Rotation 90°: (x, y) → (y, height - x)
        new_x, new_y = y, height - x
        # Horizontal flip: (x, y) → (width - x, y)
        new_x = width - new_x
        new_coordinates.append([new_x, new_y])
    return new_coordinates


def transform_coordinates_crop(coordinates, x_start, y_start, crop_width, crop_height):
    """
    Adjusts coordinates based on the crop applied.
    """
    new_coordinates = []
    for x, y in coordinates:
        # Verify if coordinates are inside the crop area
        if x_start <= x < x_start + crop_width and y_start <= y < y_start + crop_height:
            new_coordinates.append([x - x_start, y - y_start])
    return new_coordinates


def data_augmentation(source_dir, destination_dir):
    # Create destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)

    # File counter initialization
    counter = 0
    # Loop through all files in the source folder
    for file in os.listdir(source_dir):
        # Check if the file is a PNG image
        if file.endswith('.png'):
            # Full path to the image file
            image_path = os.path.join(source_dir, file)
            base_name = os.path.splitext(file)[0]

            # Read the image using OpenCV
            image = cv2.imread(image_path)
            height, width = image.shape[:2]

            # Read the corresponding JSON file
            json_path = os.path.join(source_dir, f"{base_name}.json")
            if os.path.exists(json_path):
                with open(json_path, 'r') as json_file:
                    json_data = json.load(json_file)
                original_coordinates = json_data.get("coordinates", [])
            else:
                print(f"Warning: JSON file not found for {file}.")
                original_coordinates = []

            # **Transformation 1: Rotation + Horizontal Flip**
            transformed_image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
            transformed_image = cv2.flip(transformed_image, 1)
            transformed_coordinates = transform_coordinates_double(original_coordinates, width, height)

            # Save the transformed image
            transformed_image_path = os.path.join(destination_dir, f"transformed_{file}")
            cv2.imwrite(transformed_image_path, transformed_image)

            # Save the transformed coordinates in JSON format
            json_data["coordinates"] = transformed_coordinates
            with open(os.path.join(destination_dir, f"transformed_{base_name}.json"), 'w') as json_file:
                json.dump(json_data, json_file, indent=4)

            # **Transformation 2: Crop**
            # Define the crop area (example: 50% of the original size)
            x_start = width // 4
            y_start = height // 4
            crop_width = width // 2
            crop_height = height // 2

            cropped_image = image[y_start:y_start + crop_height, x_start:x_start + crop_width]
            cropped_coordinates = transform_coordinates_crop(original_coordinates, x_start, y_start, crop_width, crop_height)

            # Save the cropped image
            cropped_image_path = os.path.join(destination_dir, f"cropped_{file}")
            cv2.imwrite(cropped_image_path, cropped_image)

            # Save the cropped coordinates in JSON format
            json_data["coordinates"] = cropped_coordinates
            with open(os.path.join(destination_dir, f"cropped_{base_name}.json"), 'w') as json_file:
                json.dump(json_data, json_file, indent=4)

            # Increment processed file counter
            counter += 1
            print(f"{counter} Transformations completed for: {file}")


# Example usage 
source_dir = './data/processed/MyocyteImages/segmented/'  # Original images and JSON files (raw data)
destination_dir = './data/processed/MyocyteImages/segmented_augmented'  # Augmented images and JSON files

data_augmentation(source_dir, destination_dir)


In [None]:
import os
import shutil

# Directories as per the cookiecutter structure
dir_phi8 = r'../data/raw/MyocyteImages/'
dir_og_images = r'../data/raw/MyocyteImages/'
dir_destino = r'../data/processed/MyocyteImages/'

# Create the destination directory if it doesn't exist
if not os.path.exists(dir_destino):
    os.makedirs(dir_destino)

# List files in the Phi8 directory
phi8_files = [f for f in os.listdir(dir_phi8) if f.endswith('__Phi8.png')]

# Process each file in the Phi8 directory
for phi8_file in phi8_files:
    # Get the base name of the file (without the extension)
    base_name = phi8_file.replace('__Phi8.png', '')
    
    # Look for corresponding files in the original images directory
    dic8_file = base_name + '__DIC8.png'
    best_file = base_name + '__Best.png'
    
    # Copy the __Phi8.png file to the destination directory
    shutil.copy(os.path.join(dir_phi8, phi8_file), os.path.join(dir_destino, phi8_file))
    
    # Check if the corresponding __DIC8.png file exists in the original images directory
    dic8_path = os.path.join(dir_og_images, dic8_file)
    if os.path.exists(dic8_path):
        shutil.copy(dic8_path, os.path.join(dir_destino, dic8_file))
    
    # Check if the corresponding __Best.png file exists in the original images directory
    best_path = os.path.join(dir_og_images, best_file)
    if os.path.exists(best_path):
        shutil.copy(best_path, os.path.join(dir_destino, best_file))


In [None]:
import os
import shutil

# Directories as per the cookiecutter structure
dir_origen = r'../data/processed/MyocyteImages/segmented'
dir_imagenes = r'../data/raw/MyocyteImages'  # Assuming images are in this directory

# Get all the .json files in the origin directory
json_files = [f for f in os.listdir(dir_origen) if f.endswith('__Phi8.json')]

# Process each JSON file
for file in json_files:
    # Get the base name without the '__Phi8.json' suffix
    base_name = file.replace('__Phi8.json', '')
    
    # Define the corresponding image filenames
    dic8_image = base_name + '__DIC8.png'
    best_image = base_name + '__Best.png'
    
    # Check if at least one of the corresponding image files exists
    image_exists = os.path.exists(os.path.join(dir_imagenes, dic8_image)) or os.path.exists(os.path.join(dir_imagenes, best_image))
    
    if image_exists:
        # Define the new JSON filenames
        best_file = base_name + '__Best.json'
        dic8_file = base_name + '__DIC8.json'
        
        # Full paths of the source and destination files
        origen_path = os.path.join(dir_origen, file)
        best_path = os.path.join(dir_origen, best_file)
        dic8_path = os.path.join(dir_origen, dic8_file)
        
        # Copy the files
        shutil.copy(origen_path, best_path)
        shutil.copy(origen_path, dic8_path)

        print(f"Duplicated files created: {best_file} and {dic8_file}")
    else:
        print(f"No corresponding image found for: {file}. No duplication will occur.")


In [None]:
import os

# Directories as per the cookiecutter structure
dir_files = r'../data/processed/MyocyteImages/segmented'

# List all files in the directory
files = os.listdir(dir_files)

# Filter images and JSON files
imagenes = [f for f in files if f.endswith(('.png', '.jpg'))]  # Filter only images
json_files = [f for f in files if f.endswith(('.json'))]  # Filter only JSON files

# Process all images
for imagen in imagenes:
    # Get the base name of the image (without extension and image suffix)
    base_name = imagen.replace('__DIC8.png', '').replace('__Best.png', '').replace('__Phi8.png', '')
    
    # Check for corresponding JSON files
    json_best = base_name + '__Best.json'
    json_dic8 = base_name + '__DIC8.json'
    json_phi8 = base_name + '__Phi8.json'
    
    # Check if the corresponding JSON files exist
    if not os.path.exists(os.path.join(dir_files, json_best)) and '__Best' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_best}")
    if not os.path.exists(os.path.join(dir_files, json_dic8)) and '__DIC8' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_dic8}")
    if not os.path.exists(os.path.join(dir_files, json_phi8)) and '__Phi8' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_phi8}")

# Process all JSON files
for json_file in json_files:
    # Get the base name of the JSON file (without suffixes)
    base_name = json_file.replace('__Best.json', '').replace('__DIC8.json', '').replace('__Phi8.json', '')
    
    # Check for corresponding images
    image_best = base_name + '__Best.png'
    image_dic8 = base_name + '__DIC8.png'
    image_phi8 = base_name + '__Phi8.png'
    
    # Check if the corresponding images exist
    if not os.path.exists(os.path.join(dir_files, image_best)) and '__Best' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_best}")
    if not os.path.exists(os.path.join(dir_files, image_dic8)) and '__DIC8' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_dic8}")
    if not os.path.exists(os.path.join(dir_files, image_phi8)) and '__Phi8' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_phi8}")


In [None]:
import os

# Directories as per the cookiecutter structure
dir_files = r'../data/processed/segmented_images_all'

# List all files in the directory
files = os.listdir(dir_files)

# Filter images and JSON files
imagenes = [f for f in files if f.endswith(('.png', '.jpg'))]  # Filter only images
json_files = [f for f in files if f.endswith(('.json'))]  # Filter only JSON files

# Process all images
for imagen in imagenes:
    # Get the base name of the image (without the extension and image suffix)
    base_name = imagen.replace('__DIC8.png', '').replace('__Best.png', '').replace('__Phi8.png', '')
    
    # Check for corresponding JSON files
    json_best = base_name + '__Best.json'
    json_dic8 = base_name + '__DIC8.json'
    json_phi8 = base_name + '__Phi8.json'
    
    # Check if the corresponding JSON files exist
    if not os.path.exists(os.path.join(dir_files, json_best)) and '__Best' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_best}")
        os.remove(os.path.join(dir_files, imagen))  # Remove the image
        print(f"Deleted image: {imagen}")
    if not os.path.exists(os.path.join(dir_files, json_dic8)) and '__DIC8' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_dic8}")
        os.remove(os.path.join(dir_files, imagen))  # Remove the image
        print(f"Deleted image: {imagen}")
    if not os.path.exists(os.path.join(dir_files, json_phi8)) and '__Phi8' in imagen:
        print(f"Missing JSON file for image: {imagen} -> {json_phi8}")
        os.remove(os.path.join(dir_files, imagen))  # Remove the image
        print(f"Deleted image: {imagen}")

# Process all JSON files
for json_file in json_files:
    # Get the base name of the JSON file (without the suffixes)
    base_name = json_file.replace('__Best.json', '').replace('__DIC8.json', '').replace('__Phi8.json', '')
    
    # Check for corresponding images
    image_best = base_name + '__Best.png'
    image_dic8 = base_name + '__DIC8.png'
    image_phi8 = base_name + '__Phi8.png'
    
    # Check if the corresponding images exist
    if not os.path.exists(os.path.join(dir_files, image_best)) and '__Best' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_best}")
        os.remove(os.path.join(dir_files, json_file))  # Remove the JSON file
        print(f"Deleted JSON file: {json_file}")
    if not os.path.exists(os.path.join(dir_files, image_dic8)) and '__DIC8' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_dic8}")
        os.remove(os.path.join(dir_files, json_file))  # Remove the JSON file
        print(f"Deleted JSON file: {json_file}")
    if not os.path.exists(os.path.join(dir_files, image_phi8)) and '__Phi8' in json_file:
        print(f"Missing image for JSON file: {json_file} -> {image_phi8}")
        os.remove(os.path.join(dir_files, json_file))  # Remove the JSON file
        print(f"Deleted JSON file: {json_file}")
