In [10]:
class Test:
    def __init__(self, val2, *args, **kwargs):
        self.val1 = 1
        self.val2 = val2

    def func_test(self, msg: str):
        print(f"salut {msg}")

In [11]:
ts = Test(val1=1, val2=2)

In [12]:
ts.val1

1

In [13]:
ts.val2

2

In [14]:
ts.func_test("moi")

salut moi


In [15]:
class SuperTest(Test):
    def __init__(self, val3, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.val3 = val3

    def func_retest(self, msg2):
        self.func_test(msg2)

In [16]:
ts = SuperTest(val2=2, val3=3)

In [17]:
ts.val1

1

In [18]:
ts.func_retest("remoi")

salut remoi


In [22]:
from typing import List, Tuple

import albumentations as A
import numpy as np
import pandas as pd
import tensorflow as tf


class BasePipeline(object):
    """
    Base class used to create tensor datasets for TensorFlow.

    Inheritance:
        object: The base class of the class hierarchy, used only to enforce WPS306.
        See https://wemake-python-stylegui.de/en/latest/pages/usage/violations/consistency.html#consistency.

    Args:
        n_classes (int): Number of classes in the dataset.
        img_shape (Tuple[int,int,int]): Dimension of the image, format is (H,W,C).
        random_seed (int): Fixed random seed for reproducibility.
    """

    def __init__(
        self,
        n_classes: int,
        img_shape: Tuple[int, int, int],
        random_seed: int,
    ) -> None:
        """Initialization of the class Tensorize.

        Initialize the class, the number of classes in the datasets, the shape of the
        images and the random seed.
        """

        self.n_classes = n_classes
        self.img_shape = img_shape
        self.random_seed = random_seed
        self.AUTOTUNE = tf.data.AUTOTUNE

    def load_images(self, data_frame: pd.DataFrame, column_name: str) -> List[str]:
        """Load the images as a list.

        Take the dataframe containing the observations and the masks and the return the
        column containing the observations as a list.

        Args:
            data_frame (pd.DataFrame): Dataframe containing the dataset.
            column_name (str): The name of the column containing the observations.

        Returns:
            The list of observations deduced from the dataframe.
        """
        return data_frame[column_name].tolist()

    @tf.function
    def parse_image_and_mask(
        self,
        image: str,
        mask: str,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Transform image and mask.

        Parse image and mask to go from path to a resized np.ndarray.

        Args:
            filename (str): The path of the image to parse.
            mask (str): The mask of the image.

        Returns:
            A np.ndarray corresponding to the image and the corresponding one-hot mask.
        """
        resized_dims = [self.img_shape[0], self.img_shape[1]]
        # convert the mask to one-hot encoding
        # decode image
        image = tf.io.read_file(image)
        # Don't use tf.image.decode_image,
        # or the output shape will be undefined
        image = tf.image.decode_jpeg(image)
        # This will convert to float values in [0, 1]
        image = tf.image.convert_image_dtype(image, tf.float32)
        image = tf.image.resize(
            image,
            resized_dims,
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
        )

        mask = tf.io.read_file(mask)
        # Don't use tf.image.decode_image,
        # or the output shape will be undefined
        mask = tf.io.decode_png(mask, channels=1)
        mask = tf.image.resize(
            mask,
            resized_dims,
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
        )

        return image, mask

    def train_preprocess(
        self,
        image: np.ndarray,
        mask: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Augmentation preprocess, if needed.

        Args:
            image (np.ndarray): The image to augment.
            mask (np.ndarray): The corresponding mask.

        Returns:
            The augmented pair.
        """

        aug = A.Compose(
            [
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomRotate90(p=0.5),
                A.Transpose(p=0.5),
            ],
        )

        augmented = aug(image=image, mask=mask)

        image = augmented["image"]
        mask = augmented["mask"]

        image = tf.cast(x=image, dtype=tf.float32)
        mask = tf.cast(x=mask, dtype=tf.float32)

        return image, mask

    @tf.function
    def apply_augments(
        self,
        image: np.ndarray,
        mask: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Apply augmentation (roations, transposition, flips), if needed.

        Args:
            image (np.ndarray): A numpy array representing an image of the dataset.
            mask (np.ndarray): A numpy array representing a mask of the dataset.

        Returns:
            An augmented pair (image, mask).
        """

        image, mask = tf.numpy_function(
            func=self.train_preprocess,
            inp=[image, mask],
            Tout=[tf.float32, tf.float32],
        )

        img_shape = [self.img_shape[0], self.img_shape[1], 3]
        mask_shape = [self.img_shape[0], self.img_shape[1], 1]

        image = tf.ensure_shape(image, shape=img_shape)
        mask = tf.ensure_shape(mask, shape=mask_shape)

        return image, mask

    def create_test_dataset(
        self,
        data_path: str,
        batch: int,
        repet: int,
        prefetch: int,
    ) -> tf.data.Dataset:
        """Creation of a tensor dataset for TensorFlow.

        Args:
            data_path (str): Path where the csv file containing the dataframe is
                located.
            batch (int): Batch size, usually 32.
            repet (int): How many times the dataset has to be repeated.
            prefetch (int): How many batch the CPU has to prepare in advance for the
                GPU.
            augment (bool): Does the dataset has to be augmented or no.

        Returns:
            A batch of observations and masks.
        """
        df = pd.read_csv(data_path)
        features = self.load_images(data_frame=df, column_name="filename")
        masks = self.load_images(data_frame=df, column_name="mask")

        dataset = tf.data.Dataset.from_tensor_slices((features, masks))
        dataset = dataset.cache()
        dataset = dataset.shuffle(len(features), seed=self.random_seed)
        dataset = dataset.repeat(repet)
        dataset = dataset.map(
            self.parse_image_and_mask,
            num_parallel_calls=self.AUTOTUNE,
        )
        dataset = dataset.batch(batch)
        return dataset.prefetch(prefetch)

In [23]:
class BaseDataset(BasePipeline):
    def __init__(
        self,
        *args,
        **kwargs,
    ) -> None:
        """Initialization of the class Tensorize.

        Initialize the class, the number of classes in the datasets, the shape of the
        images and the random seed.
        """
        super().__init__(
            *args,
            **kwargs,
        )

    def create_train_dataset(
        self,
        data_path: str,
        batch: int,
        repet: int,
        prefetch: int,
        augment: bool,
    ) -> tf.data.Dataset:
        """Creation of a tensor dataset for TensorFlow.

        Args:
            data_path (str): Path where the csv file containing the dataframe is
                located.
            batch (int): Batch size, usually 32.
            repet (int): How many times the dataset has to be repeated.
            prefetch (int): How many batch the CPU has to prepare in advance for the
                GPU.
            augment (bool): Does the dataset has to be augmented or no.

        Returns:
            A batch of observations and masks.
        """
        df = pd.read_csv(data_path)
        features = self.load_images(data_frame=df, column_name="filename")
        masks = self.load_images(data_frame=df, column_name="mask")

        dataset = tf.data.Dataset.from_tensor_slices((features, masks))
        dataset = dataset.cache()
        dataset = dataset.shuffle(len(features), seed=self.random_seed)
        dataset = dataset.repeat(repet)
        dataset = dataset.map(
            self.parse_image_and_mask,
            num_parallel_calls=self.AUTOTUNE,
        )
        if augment:
            dataset = dataset.map(self.apply_augments, num_parallel_calls=self.AUTOTUNE)
        dataset = dataset.batch(batch)
        return dataset.prefetch(prefetch)

In [27]:
ds = BaseDataset(n_classes=4, img_shape=[224, 224, 3], random_seed=42)

ds_train = ds.create_train_dataset(
    data_path="datas/prepared_dataset/test.csv",
    batch=9,
    repet=1,
    prefetch=1,
    augment=False,
)

In [30]:
ds_train

<PrefetchDataset shapes: ((None, 224, 224, None), (None, 224, 224, 1)), types: (tf.float32, tf.uint8)>

In [35]:
import matplotlib.pyplot as plt

# Let's preview 9 samples from the dataset
image_batch, label_batch = next(iter(ds_train))
plt.figure(figsize=(10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.title(np.max(label_batch[i]))
    plt.imshow(image_batch[i])
    plt.axis("off")

NotFoundError: /workspaces/segmentation_ecco/datas/raw_dataset/ML/masks/cor1_mask_384_640.png; No such file or directory
	 [[{{node PartitionedCall/ReadFile_1}}]] [Op:IteratorGetNext]