# Loading the images from the data folder to tensorflow datasets

In [39]:
import pathlib

# Load the dataset from the local folder
dataset_url = 'C:/Users/jonat/PycharmProjects/Titanium_Find_Contour_U-Net'

# Count the number of images in the dataset
images_dir = pathlib.Path('./images')
image_count = len(list(images_dir.glob('*.tif')))
print(f"The dataset contains {image_count} images")

The dataset contains 63 images


In [40]:
# Split the images to train and test folders in the data folder (80% train, 20% test)
import os
import shutil
import numpy as np

# Check if the data folder exists, if not create it
if not os.path.exists('./data'):
    os.makedirs('./data')

# Create the train and test folders
if not os.path.exists('./data/train'):
    os.makedirs('./data/train')

if not os.path.exists('./data/test'):
    os.makedirs('./data/test')

# Define the train and test folders
data_dir = pathlib.Path('./data')
train_dir = pathlib.Path('./data/train')
test_dir = pathlib.Path('./data/test')

# Copy the images from the images folder to the data folder
for file in os.listdir(images_dir):
    if file.endswith('.tif'):
        shutil.copy(os.path.join(images_dir, file), os.path.join(data_dir, file))

# Split the images to train and test folders randomly
# Seed the random number generator
np.random.seed(42)
for file in os.listdir(data_dir):
    if file.endswith('.tif'):
        if np.random.rand(1) < 0.8:
            shutil.move(os.path.join(data_dir, file), os.path.join(train_dir, file))
        else:
            shutil.move(os.path.join(data_dir, file), os.path.join(test_dir, file))

# Count the number of images in the train and test folders
train_image_count = len(list(train_dir.glob('*.tif')))
test_image_count = len(list(test_dir.glob('*.tif')))
print(f"The train folder contains {train_image_count} images")
print(f"The test folder contains {test_image_count} images")

The train folder contains 50 images
The test folder contains 13 images


In [41]:
# Convert the images from TIFF to PNG
from PIL import Image

# Convert the images in the train folder
for file in os.listdir(train_dir):
    if file.endswith('.tif'):
        tif_image = Image.open(os.path.join(train_dir, file))
        # Convert the image to a NumPy array
        tif_array = np.array(tif_image)
        # Save the NumPy array as a PNG image
        jpeg_image = Image.fromarray(tif_array)
        jpeg_image.save(os.path.join(train_dir, file.replace('.tif', '.png')), "PNG")
        tif_image.close()
        os.remove(os.path.join(train_dir, file))

# Convert the images in the test folder
for file in os.listdir(test_dir):
    if file.endswith('.tif'):
        tif_image = Image.open(os.path.join(test_dir, file))
        # Convert the image to a NumPy array
        tif_array = np.array(tif_image)
        # Save the NumPy array as a PNG image
        jpeg_image = Image.fromarray(tif_array)
        jpeg_image.save(os.path.join(test_dir, file.replace('.tif', '.png')), "PNG")
        tif_image.close()
        os.remove(os.path.join(test_dir, file))

In [42]:
# Generate the ground truth masks for the train and test datasets
# and save it in the masks folder
import cv2
import numpy as np
import os

measurements_dir = pathlib.Path('./measurments')

# Define the masks folder
masks_dir = pathlib.Path('./masks')

# Check if the masks folder exists, if not create it
if not os.path.exists('./masks'):
    os.makedirs('./masks')

# Create the train and test folders
if not os.path.exists('./masks/train'):
    os.makedirs('./masks/train')

if not os.path.exists('./masks/test'):
    os.makedirs('./masks/test')

# Define the train and test masks folders
train_masks_dir = pathlib.Path('./masks/train')
test_masks_dir = pathlib.Path('./masks/test')

# Copy the images from the measurements folder to the masks folder
for file in os.listdir(measurements_dir):
            root, ext = os.path.splitext(file)
            if root.title() + "_01.Tif" in [fil.title() for fil in os.listdir(images_dir)]:
                shutil.copy(os.path.join(measurements_dir, file), os.path.join(masks_dir, file))

# Split the images to train and test according to match file names in the train and test folders
for file in os.listdir(masks_dir):
    root, ext = os.path.splitext(file)
    if root.title() + "_01.Png" in [fil.title() for fil in os.listdir(train_dir)]:
        shutil.move(os.path.join(masks_dir, file), os.path.join(train_masks_dir, file))
    elif root.title() + "_01.Png" in [fil.title() for fil in os.listdir(test_dir)]:
        shutil.move(os.path.join(masks_dir, file), os.path.join(test_masks_dir, file))

# # Find in the train and test that don't have a match in the masks folder
# for file in os.listdir(train_dir):
#     root, ext = os.path.splitext(file)
#     if root.title() + ".Png" not in [ os.path.splitext(fil)[0].title() + "_01.Png" for fil in os.listdir(train_masks_dir)]:
#         print(file)
#         pass

# Count the number of images in the train and test masks folders
train_mask_count = len(list(train_masks_dir.glob('*.png')))
test_mask_count = len(list(test_masks_dir.glob('*.png')))
print(f"The train masks folder contains {train_mask_count} images")
print(f"The test masks folder contains {test_mask_count} images")

The train masks folder contains 50 images
The test masks folder contains 13 images


In [43]:
# Generate the ground truth masks for the train and test datasets
import cv2
import os
import numpy as np

def generate_mask_from_yellow_line(file_path: str) -> np.ndarray:
    # Define the image path
    image_path = file_path

    # Read the image
    image = cv2.imread(image_path)

    if image is None:
        print("Error: Could not open or read the image.")
    else:
        # Create a copy of the original image
        processed_image = image.copy()

        # Convert the image to the HSV color space
        hsv = cv2.cvtColor(processed_image, cv2.COLOR_BGR2HSV)

        # Define the lower and upper bounds for the yellow color in the HSV color space
        lower_yellow = np.array([20, 100, 100])
        upper_yellow = np.array([30, 255, 255])

        # Create a mask for the yellow pixels
        mask = cv2.inRange(hsv, lower_yellow, upper_yellow)

        # Find the contours in the mask
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if contours:
            for contour in contours:
                # Find the convex hull of each contour
                convex_hull = cv2.convexHull(contour)
                # Draw the convex hull on the processed image
                cv2.drawContours(processed_image, [convex_hull], -1, (0, 255, 0), 2)

            # Show the processed image
            # cv2.imshow('Processed Image', processed_image)
            # cv2.waitKey(0)

            # Create a copy of the processed image for the next step
            edges_image = processed_image.copy()

            # Apply Canny edge detection
            edges = cv2.Canny(edges_image, 400, 600)
            kernel = np.ones((2, 2), np.uint8)
            edges = cv2.dilate(edges, kernel, iterations=1)

            # cv2.imshow('Edges', edges)
            # cv2.waitKey(0)

            # Find contours in the edges
            contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

            if contours:
                # Find the largest contour
                largest_contour = max(contours, key=cv2.contourArea)

                # Draw the largest contour with a red color (0, 0, 255)
                cv2.drawContours(processed_image, [largest_contour], -1, (0, 0, 255), 2)

                # Show the final processed image
                # cv2.imshow('Processed Image', processed_image)
                # cv2.waitKey(0)

                # create a contour hull around the largest contour
                hull = cv2.convexHull(largest_contour)

                # Create a mask which all pixels inside the contour hull are set to 255
                mask = np.zeros_like(image)
                cv2.fillPoly(mask, [hull], (255, 255, 255))

                # return the mask
                return mask

            else:
                print("No contours found in the edges.")

        else:
            print("No yellow region found in the image.")

    cv2.destroyAllWindows()

In [44]:
import cv2
directories = [train_masks_dir, test_masks_dir, train_dir, test_dir]

# Clean all images from the train and test masks and data folders
for directory in directories:
    for file in os.listdir(directory):
        img = cv2.imread(os.path.join(directory, file))
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, width = gray.shape
        img = img[0:width, 0:width]
        # replace the image with the new one
        cv2.imwrite(os.path.join(directory, file), img)

In [45]:
# Generate the ground truth masks for the train and test datasets
mask_directories = [train_masks_dir, test_masks_dir]
for directory in mask_directories:
    for file in os.listdir(directory):
        mask = generate_mask_from_yellow_line(os.path.join(directory, file))
        # replace the image with the new one
        cv2.imwrite(os.path.join(directory, file), mask)

In [49]:
# Change all files ending with "_01" to ""
for directory in directories:
    for file in os.listdir(directory):
        if file.endswith('_01.png'):
            os.rename(os.path.join(directory, file), os.path.join(directory, file.replace('_01.png', '.png')))

In [47]:
# Change the reolution of all images in masks dir to 4096x4096
for directory in mask_directories:
    for file in os.listdir(directory):
        img = cv2.imread(os.path.join(directory, file))
        img = cv2.resize(img, (4096, 4096))
        # replace the image with the new one
        cv2.imwrite(os.path.join(directory, file), img)

In [48]:
# Convert all images in masks dir to grayscale
for directory in directories:
    for file in os.listdir(directory):
        img = cv2.imread(os.path.join(directory, file))
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # replace the image with the new one
        cv2.imwrite(os.path.join(directory, file), gray)