# U-net segmentation on carvana dataset

## In Colab-only code

In [1]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [6]:
if IN_COLAB:
    import os
    from pathlib import Path

    project_dir = r'/content/drive/MyDrive/Colab Notebooks/unet0 carvana/unet_segmentation'
    os.chdir(project_dir)
    print(os.getcwd())

/content/drive/MyDrive/Colab Notebooks/unet0 carvana/unet_segmentation


In [7]:
if IN_COLAB:
    from google.colab import files
    files.view(project_dir)

<IPython.core.display.Javascript object>

In [8]:
if IN_COLAB:
    import sys
    sys.path.append('/content/drive/MyDrive/Colab Notebooks/unet0 carvana/unet_segmentation/src')

In [4]:
!git add

Refresh index: 100% (19/19), done.
On branch unet-segmentation
Your branch is up to date with 'origin/unet-segmentation'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   src/configs/model_config.py[m
	[31mmodified:   src/configs/training_config.py[m
	[31mmodified:   src/models.py[m
	[31mmodified:   src/utils/io_utils.py[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31msrc/train_colab.ipynb[m
	[31msrc/unet_train_colab.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [14]:
!git diff src/unet_train_colab.ipynb

In [None]:
!pip install --upgrade keras



## Imports

In [None]:
from pathlib import Path

import numpy as np
from skimage.io import imread
import keras
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint, TensorBoard
import tensorflow as tf
from configs import io_config, model_config, training_config, ds_prepare_config
from configs.training_config import COMPILE_CONFIGS

from utils.io_utils import paths_from_dir
from utils.io_utils import save_model

RNG = np.random.RandomState(ds_prepare_config.RANDOM_STATE)

## Functions

In [None]:
def get_image_shapes(dir: Path):
    return imread(next(dir.iterdir())).shape


def get_sample_paths(images_folder: Path, masks_folder: Path, shuffle: bool):
    image_paths = [str(path) for path in sorted(paths_from_dir(images_folder))]
    mask_paths = [str(path) for path in sorted(paths_from_dir(masks_folder))]

    if shuffle:
        paths = list(zip(image_paths, mask_paths, strict=True))
        RNG.shuffle(paths)  # type: ignore
        image_paths, mask_paths = tuple(list(el) for el in zip(*paths))
    return image_paths, mask_paths


def load_image_mask(image_path, mask_path):
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)  # type: ignore
    image = tf.image.resize(image, model_config.TARGET_SHAPE[0:2])
    image = image / 255.0  # type: ignore
    # image = tf.image.convert_image_dtype(image, dtype=tf.dtypes.float32)

    mask = tf.io.read_file(mask_path)
    mask = tf.io.decode_image(mask, channels=3, expand_animations=False)
    mask = tf.image.rgb_to_grayscale(mask)
    mask = tf.image.resize(mask, model_config.TARGET_SHAPE[0:2], method="nearest")
    mask = tf.image.convert_image_dtype(mask, dtype=tf.dtypes.uint8)
    mask = mask / 255  # type: ignore

    return image, mask


def get_dataset(
    images_foler: Path, masks_folder: Path, prepare_shuffle=True, training_shuffle=True
):
    image_paths, mask_paths = get_sample_paths(
        images_foler, masks_folder, prepare_shuffle
    )
    paths_ds = tf.data.Dataset.from_tensor_slices((image_paths, mask_paths))
    ds = (
        paths_ds.map(load_image_mask, num_parallel_calls=tf.data.AUTOTUNE)
        .shuffle(ds_prepare_config.DS_SHUFFLE_BUFF_SIZE, ds_prepare_config.RANDOM_STATE)
        .batch(ds_prepare_config.BATCH_SIZE, num_parallel_calls=tf.data.AUTOTUNE)
        .prefetch(tf.data.AUTOTUNE)
    )
    return ds

In [None]:
# images, masks = get_sample_paths(
#         io_config.TRAIN_IMAGES_DIR, io_config.TRAIN_MASKS_DIR, True
#     )
# print(*(list(zip(images, masks))[:10]),sep='\n')

In [None]:
# ds = get_dataset(io_config.TRAIN_IMAGES_DIR, io_config.TRAIN_MASKS_DIR)
# image, mask = next(ds.take(1).as_numpy_iterator())
# from skimage.io import imshow, show
# imshow(image)
# show()
# imshow(mask)
# show()

## Train model function

In [None]:
def train_model(model_name):
    with open(io_config.MODEL_SAVE_DIR / f"{model_name}_architecture.json") as f:
        json_model = f.read()
    model: keras.models.Model = model_from_json(json_model)
    # image_shape = get_images_shapes(io_config.TRAIN_IMAGES_DIR)

    train_ds = get_dataset(io_config.TRAIN_IMAGES_DIR, io_config.TRAIN_MASKS_DIR)
    val_ds = get_dataset(
        io_config.VAL_IMAGES_DIR, io_config.VAL_MASKS_DIR, training_shuffle=False
    )

    optimizer = keras.optimizers.Adam(
        learning_rate=training_config.LEARNING_RATE,
        beta_1=0.9,
        beta_2=0.999,
        amsgrad=False,
    )  # ,decay=1e-6)

    comp_config = COMPILE_CONFIGS[model_config.OUT_SIZE]
    comp_config["optimizer"] = optimizer
    comp_config["run_eagerly"] = training_config.DEBUG_MODEL
    model.compile(**comp_config)

    # checkpointer = ModelCheckpoint(
    #     filepath=io_config.CHECKPOINTS_SAVE_DIR / f"{model_name}_{{epoch}}.keras",
    #     monitor="val_accuracy",
    #     verbose=1,
    #     save_best_only=True,
    #     save_weights_only=False,
    # )
    tboard = TensorBoard(io_config.TENSORBOARD_LOG_DIR)  # type: ignore
    model.fit(
        train_ds,
        epochs=training_config.EPOCHS,
        callbacks=[tboard],
        validation_data=val_ds,
        shuffle=False,
    )
    model.save(io_config.MODEL_SAVE_DIR / f"{model_name}.keras")

## **Entry point**

In [None]:
model_name = "unet0 batch_norm"
save_model(model_name)
train_model(model_name)

Epoch 1/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m647s[0m 5s/step - accuracy: 0.9147 - loss: 0.3118 - val_accuracy: 0.7876 - val_loss: 0.5639
Epoch 2/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 3s/step - accuracy: 0.9878 - loss: 0.1166 - val_accuracy: 0.7876 - val_loss: 0.6709
Epoch 3/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 3s/step - accuracy: 0.9905 - loss: 0.0965 - val_accuracy: 0.7876 - val_loss: 0.7576
Epoch 4/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 3s/step - accuracy: 0.9919 - loss: 0.0859 - val_accuracy: 0.8802 - val_loss: 0.4252
Epoch 5/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 2s/step - accuracy: 0.9928 - loss: 0.0787 - val_accuracy: 0.9704 - val_loss: 0.1341
Epoch 6/32
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 3s/step - accuracy: 0.9933 - loss: 0.0733 - val_accuracy: 0.9894 - val_loss: 0.0785
Epoch 7/32
[1m102/102

KeyboardInterrupt: 

In [None]:
%load_ext tensorboard
%tensorboard --logdir data/logs

Точность за 8 эпох - 0,9942/0,9928

**TODO**


1.   Закомитить изменения в новую ветку colab_training
2.   Изменить метрику оценки на meaniou или dice
3. Посмотреть логи tensoboard
4. Оценить лучшую модель
5. Переделать набор для определения контуров
6. Добавить оповещение об окончании обучения

