# Prerequisites before execution

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ...:: PARAMETERS : FILL THEN RUN ALL :::...
# ...:: PATH/NAME SECTION

# Define the source directory path of the init images
sourceImagesDirectory = '/content/drive/My Drive/Colab Notebooks/train_images/'
# Define the target directory path for computed images
targetImagesDirectory = '/content/drive/My Drive/Colab Notebooks/multiple_train_images_256/'
# Define the target directory path for computed masks
maskImagesDirectory = '/content/drive/My Drive/Colab Notebooks/multiple_train_masks_256/'
# Define the target directory path for created dataset
datasetDirectory = '/content/drive/My Drive/Colab Notebooks/datasets/'
# Define the name of the created dataset
datasetName = 'multiple_256_tag_float32'

# ...:: ALTERNATIVE PATH/NAME SECTION
# Define the target directory path for computed background masks
# targetMasksBkgDirectory = 'C:/Users/arnau/Documents/Formation IA/Projet/multiple_train_masks_filtered_256_bkg/'

# Define the target directory path for computed morphism masks
# targetMasksMorDirectory = 'C:/Users/arnau/Documents/Formation IA/Projet/multiple_train_masks_filtered_256_dilatation/'

# Package Loading

In [None]:
# ...:: NEEDED PACKAGE ::...

import numpy as np
import os
from time import time
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image, ImageFont, ImageDraw, ImageEnhance

import cv2
from scipy.ndimage import zoom
import re

import tensorflow as tf
from tensorflow import keras

# from scipy.ndimage import binary_dilation, binary_fill_holes, binary_opening

# Images and masks compute

In [None]:
# ...:: FUNCTION SECTION ::...

class LightSatImage():
    '''
    LightSatImage is initialized with a name, an image, a RLE (Run Length Encoding) as a string and an indicator of rank in case of multiple classes for an image.
    Object used for processing multiple computing on images and generating matching masks.
    Main features are :
      - Resize image and masks from parameters
      - Augment images and masks with flip
      - Put 0 on pixel of interest for which the value is under the provided grayscale on the source image
    '''

    def __init__(self, name, img, rle_string, rank):
        '''Instanciate an image with the image name, the image herself, a RLE as a string and the group by image label rank'''
        self.name = name
#         self.img_name = img_name
        self.img = img
        self.rle_string = rle_string
        self.rank = rank
        self.maskShape = self.img.shape

    def augment(self, input) :
        ''' Augment images with diffÃ©rent flip effect for images with multiple classes '''
        if self.rank == 2 :
            output = tf.image.flip_left_right(input)
        elif self.rank == 3 :
            output = tf.image.flip_up_down(input)
        elif self.rank == 4 :
            output = tf.image.flip_left_right(input)
            output = tf.image.flip_up_down(output)
        else:
            output = input

        if isinstance(output, np.ndarray):
            return output
        else:
            return output.numpy()

    def naiveWeakValuesRedistribution(self, arr):
        '''When values of an image are rounded in a shrinking zoom or resize
        ... this function update the values to the opposite, 0 and 255
        '''
        arr[arr > 127] = 255
        arr[arr <= 127] = 0
        return arr

    def resize_image(self, width, height):
        '''Used to resize an image with openCV2'''
        self.img = cv2.resize(self.img, (width, height), interpolation = cv2.INTER_AREA)
        # Seems better interpolation for shrinking image

    def zoom_mask(self, loopMask, width, height):
        '''
        Return an array resized according to the shape provided as parameters
        '''
        zoom_factors = (height / self.maskShape[0], width / self.maskShape[1])
        loopMask = zoom(loopMask, zoom_factors)
        loopMask = self.naiveWeakValuesRedistribution(loopMask)
        return loopMask

    def rleToBinaryMask(self):
        '''Used to transform a RLE string into a NumPy array with 0/255 values only'''
        rleNumbers = [int(numstring) for numstring in self.rle_string.split(' ')]
        rlePairs = np.array(rleNumbers).reshape(-1,2)

        maskBinary = np.zeros(self.maskShape[1]*self.maskShape[0], dtype=np.uint8)

        for index,length in rlePairs:
            index -= 1
            maskBinary[index:index+length] = 255

        maskBinary[maskBinary != 0] = 255
        maskBinary = maskBinary.reshape(self.maskShape[1], self.maskShape[0])
        maskBinary = maskBinary.T

        return maskBinary

    def filterPixelOnMaskUnderValue(self, loopMask, filterGrayScaleValue = 50):
        '''Put a zero (out of pixel of interest) any pixel on the mask for which the value is under the provided grayscale value'''
        # Check matching size of the 2 images
        assert self.img.shape == loopMask.shape, "Mask and image shape doesn't match"

        # Mask from image where pixel grayscale value is less than filterGrayScaleValue parameter
        pixels_to_zero = self.img < filterGrayScaleValue
        # Filtered mask applied to target mask
        loopMask[pixels_to_zero] = 0
        return loopMask


    def imageTransform(self, width, height, augment):
        '''
        Centralized function for transforming image
        '''
        self.resize_image(width, height)
        self.img = self.img[:, :, np.newaxis]
        if augment:
            self.img = self.augment(self.img)
        return self.img

    def generateMaskFromRLE(self, width, height, filterGrayValue, augment):
        '''
        Centralized function for generating a mask from the object RLE
        '''
        loopMask = self.rleToBinaryMask()
        loopMask = self.zoom_mask(loopMask, width, height)
        loopMask = loopMask[:, :, np.newaxis]
        loopMask = self.filterPixelOnMaskUnderValue(loopMask, filterGrayValue)
        if augment:
            return self.augment(loopMask)
        else:
            return loopMask

    def getImageAndMask(self, width, height, filterGrayValue, augment):
        '''...:: One function to rule them all ::...'''
        loopImage = self.imageTransform(width, width, augment)
        loopMask = self.generateMaskFromRLE(width, height, filterGrayValue, augment)
        return loopImage, loopMask

In [None]:
# Class Doc display

# help(LightSatImage)

In [None]:
# ...:: INIT FILE LOADING AND PREPARE ::...

# DATASET : RECUPERATION DU CONTENU DU DF ET TRAITEMENT
if 'df' in locals():
  del df

df = pd.read_csv(os.path.join(initRleFileDirectory, initRleFileName))

print("Init file length : ", len(df))

# Label and image split, then column drop
df[['image', 'label']] = df['Image_Label'].str.split('_', expand=True)
df = df.drop(columns=['Image_Label'])

# NA lines drop
df=df.dropna(axis=0, how='any')

print("DF aprÃ¨s suppression des NAs Ã  ", len(df), "lignes")

df['image_label_rank'] = df.groupby('image')['label'].rank(method='first')
df['image_label_rank'] = pd.to_numeric(df['image_label_rank'], downcast='integer')


In [None]:
# ...:: IMAGES ANDS MASKS PROCESSING ::...

start = time()
i = 0

for index, row in df.iterrows():

    # NAMES HANDLING
    base_name = os.path.splitext(row.image)[0]
    loopImage = cv2.imread(os.path.join(sourceImagesDirectory, row.image), cv2.IMREAD_GRAYSCALE)
    tmp_name = base_name + "_" + row.label
    image_name = base_name + '.jpg'
    mask_name = tmp_name + '.npy'

    # CLASS INSTANCIATION
    satImage = LightSatImage(mask_name, loopImage, row.EncodedPixels, row.image_label_rank, row.single_class)

    # IMAGE CREATION
    loopImage = satImage.imageTransform(256, 256, False)
    cv2.imwrite(os.path.join(targetImagesDirectory, image_name), loopImage)

    # MASK CREATION
    loopMask = satImage.generateMaskFromRLE(256, 256, 0, False)
    # 0 value for filterGrayValue indicate an unfiltered process
    np.save(os.path.join(maskImagesDirectory, mask_name), loopMask)

    i += 1

    if i%100 == 0:
        print("...:: ", i, " computed images ::...")
        print("Current elpased time :", (time() - start) / 60, " m")

print("Full computing elapsed time:", (time() - start) / 60, " m")

# Alternative Compute (Unused, for record only)

In [None]:
# ...:: BackGround Mask creation ::...

# start = time()
# i = 0
# prefixes = set()

# files = os.listdir(maskImagesDirectory)
# files.sort()

# for file in files:
#     match = re.match(r'([^_]*_[^_]*)_', file)

#     if match:
#         prefixes.add(match.group(1))

# for prefix in list(prefixes):

#     cumulative_array = np.zeros((256, 256, 1), dtype=np.uint8)

#     matching_files = [f for f in files if f.startswith(prefix)]
#     mask_name = prefix + "_Bkg.npy"
#     for s_file in matching_files:
#         data = np.load(os.path.join(maskImagesDirectory, s_file))
#         cumulative_array[data == 255] = np.uint8(255)
#     cumulative_array = np.subtract(np.uint8(255), cumulative_array)
#     np.save(os.path.join(targetMasksBkgDirectory, mask_name), cumulative_array)
#     i += 1

#     if i%100 == 0:
#         print("...:: ", i, " computed images ::...")
#         print("Elapsed time :", (time() - start) / 60, " m")

# print("Full elapsed time :", (time() - start) / 60, " m")

In [None]:
# ...:: Filtered Masks morphisms creation ::...

# fichiers_list = os.listdir(sourceImagesDirectory)
# fichiers_list.sort()

# start = time()
# i = 0

# for fichier in fichiers_list:

#     image = cv2.imread(os.path.join(sourceImagesDirectory, fichier), cv2.IMREAD_GRAYSCALE)

#     for classe in ['Fish','Flower','Gravel','Sugar']:
#         mask_path = os.path.join(sourceMasksDirectory, f"{fichier.replace('.jpg', '')}_{classe}.npy")

#         if os.path.exists(mask_path):
#             mask = np.load(mask_path)
#             mask = binary_dilation(mask, structure=np.ones((6,6,255)))
#             mask = binary_opening(mask, structure=np.ones((2,2,1))).astype(np.uint8)
#             mask = binary_fill_holes(mask)
#             mask = (mask * 255).astype(np.uint8)

#             # Dilation may cause pixel of interest on the black stripe, so update values where init image is black
#             mask[image < 5] = 0

#             target_mask_path = os.path.join(targetMasksMorDirectory, f"{fichier.replace('.jpg', '')}_{classe}.npy")
#             np.save(target_mask_path, mask)

#         i += 1

#         if i%100 == 0:
#           print("...:: ", i, " computed images ::...")
#           print("Elapsed time :", (time() - start) / 60, "m")

# print("Full elapsed time :", (time() - start) / 60, "m")

# Dataset building

In [None]:
start = time()

# File names list
fichiers_list = os.listdir(targetImagesDirectory)

# Being sur of file name sorting
fichiers_list.sort()

def generer_image_masques(fichiers):

    i = 0

    for fichier in fichiers:
        fichier = fichier.decode()

        image = cv2.imread(os.path.join(targetImagesDirectory, fichier), cv2.IMREAD_GRAYSCALE)
        image = tf.cast(image, tf.float32) / 255.0
        image = image[:, :, np.newaxis]

        masques = []

        for classe in ['Fish','Flower','Gravel','Sugar']:
            mask_path = os.path.join(maskImagesDirectory, f"{fichier.replace('.jpg', '')}_{classe}.npy")

            if os.path.exists(mask_path):
                masque = tf.cast(np.load(mask_path) / 255, tf.uint8)
                masque = tf.squeeze(masque)
                masques.append(masque)
            else:
                masque = np.zeros((256, 256), dtype=np.uint8)
                masques.append(masque)

        masques = np.stack(masques, axis=-1)

        i += 1

        if i%100 == 0:
          print("...:: ", i, " computed images ::...")
          print("Current elapsed time :", (time() - start) / 60, " m")

        yield image, masques


dataset = tf.data.Dataset.from_generator(generer_image_masques, args=[fichiers_list], output_signature=(tf.TensorSpec(shape=(None, None, 1), dtype=tf.float32), tf.TensorSpec(shape=(None, None, 4), dtype=tf.uint8)))
tf.data.Dataset.save(dataset, os.path.join(datasetDirectory, datasetName))

print("Full elapsed time :", (time() - start) / 60, " m")

# Alternative dataset building (Unused, for record only)

In [None]:
# ...:: Dataset bulding with background as a class ::...


# imagesBkgDir = 'C:/Users/arnau/Documents/Formation IA/Projet/multiple_train_augment_masks_256_bkg/'

# start = time()

# fichiers_list = os.listdir(targetImagesDirectory)
# fichiers_list.sort()

# def generer_image_masques(fichiers):

#     i = 0

#     for fichier in fichiers:
#         fichier = fichier.decode()

#         image = cv2.imread(os.path.join(targetImagesDirectory, fichier), cv2.IMREAD_GRAYSCALE)
#         image = tf.cast(image, tf.float32) / 255.0
#         image = image[:, :, np.newaxis]

#         masques = []

#         for classe in ['Fish','Flower','Gravel','Sugar']:
#             mask_path = os.path.join(maskImagesDirectory, f"{fichier.replace('.jpg', '')}_{classe}.npy")

#             if os.path.exists(mask_path):
#                 masque = tf.cast(np.load(mask_path) / 255, tf.uint8)
#                 masque = tf.squeeze(masque)
#                 masques.append(masque)
#             else:
#                 masque = np.zeros((256, 256), dtype=np.uint8)
#                 masques.append(masque)

#         bkg = tf.squeeze(tf.cast(np.load(os.path.join(targetMasksBkgDirectory, f"{fichier.replace('.jpg', '')}_Bkg.npy")) / 255, tf.uint8))
#         masques.append(bkg)
#         masques = np.stack(masques, axis=-1)

#         i += 1

#         if i%100 == 0:
#           print("...:: ", i, " computed images ::...")
#           print("Current elapsed time :", (time() - start) / 60, " m")

#         yield image, masques

# dataset = tf.data.Dataset.from_generator(generer_image_masques, args=[fichiers_list], output_signature=(tf.TensorSpec(shape=(None, None, 1), dtype=tf.float32), tf.TensorSpec(shape=(None, None, 5), dtype=tf.uint8)))
# tf.data.Dataset.save(dataset, os.path.join(datasetDirectory, datasetName))

# print("Full elapsed time :", (time() - start) / 60, " m")