In [32]:
import os
import keras
import numpy as np
from tensorflow import data as tf_data
from tensorflow import image as tf_image
from tensorflow import io as tf_io
from keras import layers
import random
import shutil
import numpy as np
from PIL import Image
from PIL import ImageOps
import onnxruntime as ort
import cv2

In [3]:
input_dir = "./data/segmentation/images/"
target_dir = "./data/segmentation/annotations/trimaps/"
img_size = (160, 160)
num_classes = 3
batch_size = 3

In [4]:
input_img_paths = sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".jpg")
    ]
)

target_img_paths = sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)

Number of samples: 7390
./data/segmentation/images/Abyssinian_1.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_1.png
./data/segmentation/images/Abyssinian_10.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_10.png
./data/segmentation/images/Abyssinian_100.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_100.png
./data/segmentation/images/Abyssinian_101.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_101.png
./data/segmentation/images/Abyssinian_102.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_102.png
./data/segmentation/images/Abyssinian_103.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_103.png
./data/segmentation/images/Abyssinian_104.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_104.png
./data/segmentation/images/Abyssinian_105.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_105.png
./data/segmentation/images/Abyssinian_106.jpg | ./data/segmentation/annotations/trimaps/Abyssinian_106.png
./data/segmentation

In [5]:
def get_dataset(
    batch_size,
    img_size,
    input_img_paths,
    target_img_paths,
    max_dataset_len=None,
):
    # Metoda zwraca obraz oraz jego anotację, zwróć uwagę, że obrazki nie są skalowane do 0-1,
    # będzie to mieć znaczenie przy używaniu sieci w Unity
    def load_img_masks(input_img_path, target_img_path):
        input_img = tf_io.read_file(input_img_path)
        input_img = tf_io.decode_png(input_img, channels=3)
        input_img = tf_image.resize(input_img, img_size)
        input_img = tf_image.convert_image_dtype(input_img, "float32")

        target_img = tf_io.read_file(target_img_path)
        target_img = tf_io.decode_png(target_img, channels=1)
        target_img = tf_image.resize(target_img, img_size, method="nearest")
        target_img = tf_image.convert_image_dtype(target_img, "uint8")

        # Etykiety to 1, 2, 3. Odejmujemy jeden, aby otrzymać 0, 1, 2
        target_img -= 1
        return input_img, target_img

    # Jesli chcemy możemy ograniczyć zbióra danych na potrzeby debugowania
    if max_dataset_len:
        input_img_paths = input_img_paths[:max_dataset_len]
        target_img_paths = target_img_paths[:max_dataset_len]
    dataset = tf_data.Dataset.from_tensor_slices((input_img_paths, target_img_paths))
    dataset = dataset.map(load_img_masks, num_parallel_calls=tf_data.AUTOTUNE)
    return dataset.batch(batch_size)

In [7]:
def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))


    # Downsampling

    # Blok wejsciowy
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # residuum

    # Bloki 1, 2, 3 mają taką samą strukturę, ale inną liczbę neuronów
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # dodanie residuum
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])
        previous_block_activation = x

    # upsampling

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # dodanie residuum
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Warstwa wyjsciowa dokonująca klasyfikacji poszczególnych pikseli
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    model = keras.Model(inputs, outputs)
    return model

In [8]:
model = get_model(img_size, num_classes)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 160, 160, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 80, 80, 32)   896         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 80, 80, 32)  128         ['conv2d[0][0]']                 
 alization)                                                                                       
                                                                                              

In [11]:
val_samples = 1000
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)
train_input_img_paths = input_img_paths[:-val_samples]
train_target_img_paths = target_img_paths[:-val_samples]
val_input_img_paths = input_img_paths[-val_samples:]
val_target_img_paths = target_img_paths[-val_samples:]

In [12]:
train_dataset = get_dataset(
    batch_size,
    img_size,
    train_input_img_paths,
    train_target_img_paths,
    max_dataset_len=1000,# usuń, jesli chcesz trenować na całym zbiorze danych
)
valid_dataset = get_dataset(
    batch_size, img_size, val_input_img_paths, val_target_img_paths
)

In [13]:
model.compile(
    optimizer=keras.optimizers.Adam(1e-4), loss="sparse_categorical_crossentropy"
)

In [14]:
callbacks = [
    keras.callbacks.ModelCheckpoint("unet_segmentation.keras", save_best_only=True)
]

In [16]:
epochs = 15
model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=2,
)

Epoch 1/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 76s - loss: 0.6605 - val_loss: 0.6634 - 76s/epoch - 226ms/step
Epoch 2/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 76s - loss: 0.5977 - val_loss: 0.6682 - 76s/epoch - 228ms/step
Epoch 3/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 74s - loss: 0.5387 - val_loss: 0.7231 - 74s/epoch - 220ms/step
Epoch 4/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 77s - loss: 0.4723 - val_loss: 0.7789 - 77s/epoch - 229ms/step
Epoch 5/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 73s - loss: 0.3978 - val_loss: 0.9826 - 73s/epoch - 218ms/step
Epoch 6/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 77s - loss: 0.3382 - val_loss: 1.5963 - 77s/epoch - 231ms/step
Epoch 7/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 70s - loss: 0.3247 - val_loss: 1.1217 - 70s/epoch - 211ms/step
Epoch 8/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 71s - loss: 0.3311 - val_loss: 1.3146 - 71s/epoch - 213ms/step
Epoch 9/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 75s - loss: 0.2958 - val_loss: 0.8880 - 75s/epoch - 224ms/step
Epoch 10/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 68s - loss: 0.2589 - val_loss: 0.9252 - 68s/epoch - 205ms/step
Epoch 11/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 70s - loss: 0.2319 - val_loss: 1.0577 - 70s/epoch - 210ms/step
Epoch 12/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 69s - loss: 0.2198 - val_loss: 0.9929 - 69s/epoch - 208ms/step
Epoch 13/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 68s - loss: 0.2116 - val_loss: 1.0471 - 68s/epoch - 203ms/step
Epoch 14/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 69s - loss: 0.1965 - val_loss: 0.9471 - 69s/epoch - 207ms/step
Epoch 15/15


Corrupt JPEG data: 240 extraneous bytes before marker 0xd9


334/334 - 70s - loss: 0.1832 - val_loss: 1.0826 - 70s/epoch - 208ms/step


<keras.callbacks.History at 0x31411b7f0>

In [20]:
save_model_path="unet_segmentation_saved_model"
if not os.path.isdir(save_model_path):
      os.makedirs(save_model_path)

model.save(save_model_path)



INFO:tensorflow:Assets written to: unet_segmentation_saved_model/assets


INFO:tensorflow:Assets written to: unet_segmentation_saved_model/assets


In [21]:
shutil.make_archive(save_model_path, 'zip', save_model_path)

'/Users/eryk/Documents/Metody głębokiego uczenia w systemach wizyjnych i wirtualnej rzeczywistości/deep-learning-vr-uni-projects/project01/unet_segmentation_saved_model.zip'

In [25]:
# !unzip unet_segmentation_saved_model.zip -d unet_segmentation_saved_model
!python -m tf2onnx.convert --saved-model unet_segmentation_saved_model --opset 11 --output unet_segmentation_saved_model.onnx

2025-03-26 19:01:16,488 - INFO - Signatures found in model: [serving_default].
2025-03-26 19:01:16,489 - INFO - Output names: ['conv2d_8']
2025-03-26 19:01:16,959 - INFO - Using tensorflow=2.12.0, onnx=1.17.0, tf2onnx=1.16.1/15c810
2025-03-26 19:01:16,959 - INFO - Using opset <onnx, 11>
2025-03-26 19:01:17,009 - INFO - Computed 0 values for constant folding
2025-03-26 19:01:17,121 - INFO - Optimizing ONNX model
2025-03-26 19:01:17,422 - INFO - After optimization: BatchNormalization -1 (15->14), Cast -8 (8->0), Concat -8 (8->0), Const -104 (214->110), Identity -2 (2->0), Reshape -6 (6->0), Shape -8 (8->0), Slice -8 (8->0), Squeeze -8 (8->0), Transpose -114 (116->2), Unsqueeze -32 (32->0)
2025-03-26 19:01:17,436 - INFO - 
2025-03-26 19:01:17,436 - INFO - Successfully converted TensorFlow model unet_segmentation_saved_model to ONNX
2025-03-26 19:01:17,436 - INFO - Model inputs: ['input_1']
2025-03-26 19:01:17,436 - INFO - Model outputs: ['conv2d_8']
2025-03-26 19:01:17,436 - INFO - ONNX m

In [26]:
onnx_path = 'unet_segmentation_saved_model.onnx'
img_size = (160, 160)

In [27]:
img = Image.open("./data/segmentation/images/Abyssinian_1.jpg").convert('RGB').resize((160,160))
input_img=np.expand_dims(np.asarray(img, dtype="float32"),axis=0)

In [28]:
def display_mask(val_preds):
    """Quick utility to display a model's prediction."""
    mask = np.argmax(val_preds, axis=-1)
    mask = np.expand_dims(mask, axis=-1)
    img = ImageOps.autocontrast(keras.utils.array_to_img(mask))
    img = np.array(img)
    cv2.imshow('a', img)
    cv2.waitKey()

def display_mask2(val_preds):
    cv2.imshow('a', val_preds)
    cv2.waitKey()

In [None]:
sess_ort = ort.InferenceSession(onnx_path,providers=ort.get_available_providers())

outputs = sess_ort.run(None, {sess_ort.get_inputs()[0].name: input_img})
outputs = outputs[0]
display_mask2(outputs[0])

[0;93m2025-03-26 19:02:16.779506 [W:onnxruntime:, coreml_execution_provider.cc:112 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 13 number of nodes in the graph: 73 number of nodes supported by CoreML: 56[m
2025-03-26 19:02:17.745 python[51119:4631707] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-26 19:02:17.745 python[51119:4631707] +[IMKInputSession subclass]: chose IMKInputSession_Modern
