### Подключение инфраструктуры

In [0]:
# Подключение к Google drive

from google.colab import drive
drive.mount('/content/drive')

In [0]:
PATH = '/content/drive/My Drive/GU_NN/'

In [0]:
import sys
import os
sys.path.append(PATH)

### Задание

Обучить модель семантической сегментации (человек-vs-фон) на подмножестве датасета MS COCO
Библиотеки: [Python, Tensorflow]

### Библиотеки

In [0]:
import skimage.io as io
import numpy as np

import tensorflow as tf

In [0]:
print(tf.__version__)

2.2.0


### Константы

In [0]:
RANDOM_STATE = 1

In [0]:
tf.random.set_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)
tf.compat.v1.random.set_random_seed(RANDOM_STATE)

### Загрузка датасета COCO и COCO API

In [0]:
if 0:
    #!cd 'drive/My Drive/GU_NN/data'  && wget http://images.cocodataset.org/zips/train2017.zip 
    #!cd 'drive/My Drive/GU_NN/data'  && wget http://images.cocodataset.org/zips/val2017.zip 
    #!cd 'drive/My Drive/GU_NN/data'  && wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 

    #!cd 'drive/My Drive/GU_NN/data'  && unzip -q train2017.zip
    #!cd 'drive/My Drive/GU_NN/data'  && unzip -q val2017.zip
    #!cd 'drive/My Drive/GU_NN/data'  && unzip -q annotations_trainval2017.zip

    #!cd 'drive/My Drive/GU_NN/data' && git clone https://github.com/cocodataset/cocoapi
    #!cd 'drive/My Drive/GU_NN/data/cocoapi/PythonAPI' && make
    print('End')

In [0]:
COCO_ROOT = os.path.join(PATH,'data')
import sys
sys.path.insert(0, os.path.join(COCO_ROOT, 'cocoapi/PythonAPI'))
from pycocotools.coco import COCO

### Универсальный класс Dataset для сегментации

In [0]:
class Dataset():

    def crop_images(self, img, inp_size, random_crop=False):
      
        shape = tf.shape(img)
        pad = (
            [0, tf.maximum(inp_size - shape[0], 0)],
            [0, tf.maximum(inp_size - shape[1], 0)],
            [0, 0],
        )
        img = tf.pad(img, pad)
        if random_crop:
            #img = tf.image.random_crop(img, (inp_size, inp_size, shape[2]))
            img = tf.image.random_crop(img, (inp_size, inp_size, 4)) 
        else: # central crop
            shape = tf.shape(img)
            ho = (shape[0] - inp_size) // 2
            wo = (shape[1] - inp_size) // 2
            #img = img[ho:ho+inp_size, wo:wo+inp_size, :]
            img = img[ho:ho+inp_size, wo:wo+inp_size, :4]
        return img

    def train_dataset(self, batch_size, epochs, inp_size):

        def item_to_images(item):
            random_crop = True
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, random_crop)
            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.shuffle(buffer_size=len(self.img_list))
        dataset = dataset.map(item_to_images)
        dataset = dataset.repeat(epochs)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

    def val_dataset(self, batch_size, inp_size):

        def item_to_images(item):
            random_crop = False
            img_combined = tf.py_function(self.read_images, [item], tf.uint8)
            img_combined = self.crop_images(img_combined, inp_size, random_crop)

            img = tf.cast(img_combined[...,:3], tf.float32) / np.float32(255.)
            mask_class = tf.cast(img_combined[...,3:4], tf.float32)
            return img, mask_class

        dataset = tf.data.Dataset.from_tensor_slices(self.img_list)
        dataset = dataset.map(item_to_images)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        return dataset

### Класс для сегментационного датасета COCO  
Класс наследутся от универсльного Dataset и реализует кастомную функцию чтения данных.

In [0]:
import csv

In [0]:
class COCO_Dataset(Dataset):

    def __init__(self, sublist):
        ann_file_fpath = os.path.join(COCO_ROOT, 'annotations', 'instances_'+sublist+'2017.json')
        self.coco = COCO(ann_file_fpath)
        self.sublist = sublist
        self.cat_ids = self.coco.getCatIds(catNms=['person'])
        self.img_list = self.coco.getImgIds(catIds=self.cat_ids)

    def read_images(self, img_id):
        img_id = int(img_id.numpy())
        img_data = self.coco.loadImgs(img_id)[0]
        img_fname = '/'.join(img_data['coco_url'].split('/')[-2:])

        img = io.imread(os.path.join(COCO_ROOT, img_fname))
        if len(img.shape) == 2:
            img = np.tile(img[..., None], (1, 1, 3))

        ann_ids = self.coco.getAnnIds(imgIds=img_data['id'], catIds=self.cat_ids, iscrowd=None)
        anns = self.coco.loadAnns(ann_ids)
        mask_class = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
        for i in range(len(anns)):
            mask_class += self.coco.annToMask(anns[i])
        mask_class = (mask_class > 0).astype(np.uint8)
        img_combined = np.concatenate([img, mask_class[..., None]], axis=2)
        return img_combined

    def build_csvperson(self):
      with open(self.sublist+'person.csv', 'w', newline='') as csvfile:
          fieldnames = ['id', 'file_name']
          writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
          writer.writeheader()
          for spam in self.coco.loadImgs(self.img_list):
            writer.writerow({'id': spam['id'] , 'file_name': spam['file_name']})
      print(f'{self.sublist}person.csv is ready')

In [0]:
COCO_dataset_train = COCO_Dataset('train')
COCO_dataset_val = COCO_Dataset('val')

loading annotations into memory...
Done (t=24.22s)
creating index...
index created!
loading annotations into memory...
Done (t=1.29s)
creating index...
index created!


In [0]:
COCO_dataset_val.build_csvperson()
COCO_dataset_train.build_csvperson()

valperson.csv is ready
trainperson.csv is ready


In [0]:
BATCH_SIZE = 32
IMG_INP_SIZE = 256
EPOCHS = 1 

In [0]:
train_ds = COCO_dataset_train.train_dataset(batch_size=BATCH_SIZE, epochs=EPOCHS, inp_size=IMG_INP_SIZE)
val_ds = COCO_dataset_val.val_dataset(batch_size=BATCH_SIZE, inp_size=IMG_INP_SIZE)

### Модель

In [0]:
class ASPPBlock(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(256, (1, 1), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(256, (3, 3), dilation_rate=6, padding='same', activation='relu')
        self.conv3 = tf.keras.layers.Conv2D(256, (3, 3), dilation_rate=12, padding='same', activation='relu')
        self.conv4 = tf.keras.layers.Conv2D(256, (3, 3), dilation_rate=18, padding='same', activation='relu')
        self.conv5 = tf.keras.layers.Conv2D(256, (1, 1), padding='same', activation='relu')

    def call(self, inp, is_training=False):
        out1 = self.conv1(inp)
        out2 = self.conv2(inp)
        out3 = self.conv3(inp)
        out4 = self.conv4(inp)
        out = tf.concat([out1, out2, out3, out4], axis=3)
        out = self.conv5(out)
        return out
    
class ASPPNet(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')
        self.conv2 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu')
        self.conv3 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')
        self.conv4 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu')
        self.conv5 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')
        self.conv6 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')
        self.conv7 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')
        self.conv8 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')
        self.conv9 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')
        self.conv10 = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')

        self.conv11 = tf.keras.layers.Conv2D(48, (1, 1), padding='same', activation='relu')
        self.conv12 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')
        self.conv13 = tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu')
        self.conv14 = tf.keras.layers.Conv2D(1, (1, 1), padding='same', activation=None)

        self.maxpool = tf.keras.layers.MaxPooling2D((2, 2), (2, 2), padding='same')

        self.aspp = ASPPBlock()

    def call(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.maxpool(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.maxpool(out)
        out = self.conv5(out)
        out = self.conv6(out)
        out_enc_mid = out
        out = self.maxpool(out)
        out = self.conv7(out)
        out = self.conv8(out)
        out = self.maxpool(out)
        out = self.conv9(out)
        out = self.conv10(out)

        out = self.aspp(out)

        out = tf.image.resize(out, tf.shape(out_enc_mid)[1:3], tf.image.ResizeMethod.BILINEAR)

        out_enc_mid = self.conv11(out_enc_mid)

        out = tf.concat([out, out_enc_mid], axis=3)

        out = self.conv12(out)
        out = self.conv13(out)
        out = self.conv14(out)

        out = tf.image.resize(out, tf.shape(x)[1:3], tf.image.ResizeMethod.BILINEAR)
        out = tf.nn.sigmoid(out)
        return out
    
model = ASPPNet()

In [0]:
loss = tf.keras.losses.BinaryCrossentropy()
model.compile(optimizer="adam", loss=loss)

### Обучение

In [0]:
# Добавим эпоху в имя файла (uses `str.format`)
checkpoint_path = os.path.join(PATH, "training") + "/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Создадим коллбек сохраняющий веса модели каждые 5 эпох
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=0, 
    save_weights_only=True,
    save_freq=5)

In [0]:
try:
  latest = tf.train.latest_checkpoint(checkpoint_dir)
  model.load_weights(latest)
except:
  model.save_weights(checkpoint_path.format(epoch=0))

In [0]:
train_ds

<BatchDataset shapes: ((32, 256, 256, 3), (32, 256, 256, 1)), types: (tf.float32, tf.float32)>

In [0]:
import matplotlib.pyplot as plt

# Визуализация изображений
if 1:
  i = next(iter(train_ds))
  for idx in range(5):
      sample = i[0][idx].numpy()
      seg_map = plt.get_cmap('viridis')(i[1][idx].numpy().reshape(IMG_INP_SIZE,IMG_INP_SIZE))[..., :3]
      plt.imshow(sample*0.5 + seg_map*0.5)
      plt.show()

In [0]:
# Обучаем модель с новым коллбеком
%%time
if 1:
  history = model.fit(train_ds,  
            epochs=EPOCHS,
            steps_per_epoch=len(COCO_dataset_train.img_list)//BATCH_SIZE,
            validation_data=val_ds,
            callbacks=[cp_callback])

### Результат

In [0]:
# Визуализация изображений
if 1:
  test_batch = next(iter(val_ds))
  prediction = model.predict(test_batch)
  for idx in range(5):
      sample = test_batch[0][idx].numpy()
      fig = plt.figure(figsize=(16,6))
    
      plt.subplot(1,2,1)
      plt.title("True segment")
      val_seg_map = plt.get_cmap('viridis')(test_batch[1][idx].numpy().reshape(IMG_INP_SIZE,IMG_INP_SIZE))[..., :3]
      plt.imshow(sample*0.5 + val_seg_map*0.5)

      plt.subplot(1,2,2)
      plt.title("Predict segment")
      pred_seg_map = plt.get_cmap('viridis')(prediction[idx].numpy().reshape(IMG_INP_SIZE,IMG_INP_SIZE))[..., :3]
      plt.imshow(sample*0.5 + pred_seg_map *0.5)
      plt.show()