In [1]:
import os
import cv2
import numpy as np
import seaborn as sns
from skimage import io
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import load_img

from IPython.display import HTML
from IPython.display import clear_output
import warnings
warnings.simplefilter("ignore")

# Скачивание данных с помощью Kaggle API

https://www.kaggle.com/docs/api

https://www.kaggle.com/general/74235

In [2]:
# !pip install -q kaggle

# from google.colab import files

# files.upload()

## Coco Dataset

Первым набором данных является датасет [COCO](https://cocodataset.org/#download)

COCO - это крупномасштабный набор данных для обнаружения объектов, сегментации и субтитров. COCO имеет несколько особенностей:

* Сегментация объектов
* Признание в контексте
* Сегментация суперпиксельного материала
* 330 тысяч изображений (>200K помеченных) миллиона экземпляров объектов
* 80 категорий объектов
* 91 категория товаров
* 5 подписей к изображению
* 250 000 человек с ключевыми точками


Формат для набора данных обнаружения объектов COCO задокументирован в формате [COCO Data Format](https://cocodataset.org/#format-data). Данные COCO состоят из пяти разделов информации, которые предоставляют информацию для всего набора данных.

1. Информация – общая информация о наборе данных.

2. Лицензии – информация о лицензии для изображений в наборе данных.

3. Изображения – список изображений в наборе данных.

4. Аннотации – список аннотаций (включая ограничивающие рамки), которые присутствуют на всех изображениях в наборе данных.

5. Категории - список категорий этикеток.

COCO хранит аннотации в формате JSON.

В официальном документе COCO говорится, что он имеет пять функций: обнаружение объектов, обнаружение ключевых точек, сегментация материала, паноптическая сегментация и субтитры к изображениям.

В данном ноутбуке будет использована разновидность COCO датасета с kaggle: [COCO-Person-Segmentation](https://www.kaggle.com/datasets/oishee30/cocopersonsegmentation) со следующей структурой

    cocopersonsegmentation
        ├── train2017_ann
        ├── train2017_new
        ├── val2017_ann
        └── val2017_new

В данной модификации уже произведено разбиение изображений на тренировочную и валидационные выборки, а также аннотации (в данном случае маски) к ним

    

In [3]:
# ! mkdir ~/.kaggle                 #make directory(folder) named .kaggle
 
# ! cp kaggle.json ~/.kaggle/       #add file to that folder
 
# ! chmod 600 ~/.kaggle/kaggle.json        #Change the permissions of the file.

In [4]:

# ! kaggle datasets download oishee30/cocopersonsegmentation

# print(os.listdir('/content'))

In [5]:
# ! mkdir data        #making directory data
 
# ! unzip cocopersonsegmentation.zip -d data            #unzipping data into data directory

In [6]:
# os.remove('/content/cocopersonsegmentation.zip')
# print(os.listdir('/content'))

In [7]:
os.listdir('/kaggle/')

In [8]:
# path = '/content/data'
# for folder in os.listdir(path):
#   if '.txt' not in folder:
#     print('No of images in',folder,len(os.listdir(path+'/'+folder)))

In [9]:
xpath_coco = '../input/cocopersonsegmentation/train2017_new'
ypath_coco = '../input/cocopersonsegmentation/train2017_ann'
x_test_path = '../input/cocopersonsegmentation/val2017_new'
y_test_path = '../input/cocopersonsegmentation/val2017_ann'

In [10]:
input_img_paths_coco = sorted([os.path.join(xpath_coco, fname) for fname in os.listdir(xpath_coco)])
target_img_paths_coco = sorted([os.path.join(ypath_coco, fname) for fname in os.listdir(ypath_coco)])

print(len(input_img_paths_coco), len(target_img_paths_coco))
for input_path, target_path in zip(input_img_paths_coco[:4], target_img_paths_coco[:4]):
    print(input_path, "|", target_path)

## Другой датасет

Второй датасет [Person segmentation dataset](https://www.kaggle.com/datasets/furkankati/person-segmentation-dataset) предназначен для сегментации изображений людей. Включает только фотографии людей с разных ракурсов.

Набор данных составлен из https://vuhcs.github.io/ и имеет следующую структуру:

    person-segmentation-dataset
    └── Training
        ├── Output
        └── input


В папке *input* есть цветные изображения людей. В папке *Output* есть изображения с аннотациями с тем же именем и размером. Аннотации - это изображения серого цвета. Значения пикселей аннотации равны 255. 255 для лица и 0 для фона.

In [11]:
# ! kaggle datasets download furkankati/person-segmentation-dataset

# print(os.listdir('/content'))

In [12]:
# ! unzip person-segmentation-dataset.zip -d data

In [13]:
# os.remove('/content/person-segmentation-dataset.zip')
# print(os.listdir('/content'))

In [14]:
# os.listdir('/content/data/')

In [15]:
path = '../input/person-segmentation-dataset/Training/'
for folder in os.listdir(path):
    if ('.txt' not in folder) and ('.hdf' not in folder) :
        print('No of images in',folder,len(os.listdir(path+folder)))

In [16]:
xpath_89k = '../input/person-segmentation-dataset/Training/input/'
ypath_89k = '../input/person-segmentation-dataset/Training/Output/'
 
# input_img_paths_89k = sorted([os.path.join(xpath_89k, fname) for fname in os.listdir(xpath_89k)])
# target_img_paths_89k = sorted([os.path.join(ypath_89k, fname) for fname in os.listdir(ypath_89k)])

In [17]:
input_img_paths_89k = sorted([os.path.join(xpath_89k, fname) for fname in os.listdir(xpath_89k)])
target_img_paths_89k = sorted([os.path.join(ypath_89k, fname) for fname in os.listdir(ypath_89k)])

print(len(input_img_paths_89k), len(target_img_paths_89k))
for input_path, target_path in zip(input_img_paths_89k[:4], target_img_paths_89k[:4]):
    print(input_path, "|", target_path)

# Подготовка данных к обучению



In [18]:
train_input_img_paths = input_img_paths_coco.copy()
target_input_img_paths = target_img_paths_coco.copy()

print(len(train_input_img_paths), len(target_input_img_paths))

In [19]:
for img in input_img_paths_89k:
    train_input_img_paths.append(img)

    
for img in target_img_paths_89k:
    target_input_img_paths.append(img)

print(len(train_input_img_paths), len(target_input_img_paths))

In [20]:
train_input_img_paths = sorted(train_input_img_paths)    #sorting
train_target_img_paths = sorted(target_input_img_paths)   #sorting
 
val_input_img_paths = sorted([os.path.join(x_test_path, fname) for fname in os.listdir(x_test_path)])
val_target_img_paths = sorted([os.path.join(y_test_path, fname) for fname in os.listdir(y_test_path)])
 
print("Number of training samples:", len(train_input_img_paths))
print("Number of validation samples:", len(val_input_img_paths))
 
for input_path, target_path in zip(train_input_img_paths[:4], train_target_img_paths[:4]):
    print(input_path, "|", target_path)

In [21]:
n_images = 5
for i in np.random.randint(0,len(train_input_img_paths),n_images):
 
    fig = plt.figure(figsize=(12,6))
    fig.tight_layout()
    plt.subplot(1,2,1)
    img = plt.imread(train_input_img_paths[i])
    plt.imshow(img)
    plt.title('Image')
 
    plt.subplot(1,2,2)
    img = plt.imread(train_target_img_paths[i])
    plt.imshow(img)
    plt.title('Mask')
 
    plt.show()
    print()

In [22]:
mask = plt.imread(train_target_img_paths[0])
print(np.unique(mask, return_counts= True))
print(mask.shape)
sns.countplot(mask.ravel())
plt.show()

In [23]:
mask = plt.imread(train_target_img_paths[-1])
print(np.unique(mask, return_counts= True))
print(mask.shape)
sns.countplot(mask.ravel())
plt.show()

In [24]:
batch_size = 16
img_size = (256,256)

class Data_Gen(keras.utils.Sequence):
    """Helper function to iterate over the data (as Numpy arrays)."""
 
    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths
 
    def __len__(self):
        return len(self.target_img_paths) // self.batch_size                    # 64115//32
 
    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size                                                            # 0
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]                # [0: 0+32]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]
 
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")           #(32,256,256,3)
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size=self.img_size)
            img = np.array(img)/255
            x[j] = img
 
        y = np.zeros((self.batch_size,) + self.img_size + (1,), dtype="uint8")             #(32,256,256,1)
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.img_size, color_mode="grayscale")        #(256,256)
            img = np.array(img)
            img[img!=0] = 1
            y[j] = np.expand_dims(img, 2)                                                  #(256,256,1)
 
        return x, y

In [25]:
# checking gererator function
train_gen = Data_Gen(batch_size, img_size, train_input_img_paths, train_target_img_paths)
val_gen = Data_Gen(batch_size, img_size, val_input_img_paths, val_target_img_paths)
x, y = train_gen.__getitem__(0)
print(x.shape, y.shape)

In [26]:
print(y[0].shape)
print(np.unique(y[0], return_counts= True))

# Построение модели

In [27]:
def downblock(filters, filter_size, previous_layer):
    x = layers.Conv2D(filters, filter_size, padding="same")(previous_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
 
    x = layers.Conv2D(filters, filter_size, padding="same")(x)
    x = layers.BatchNormalization()(x)
  
    residual = layers.Conv2D(filters, 1, padding="same")(previous_layer)      #separate layer for addintion
    x = layers.add([x, residual])  # Add back residual
 
    x = layers.Activation("relu")(x)
    p = layers.MaxPooling2D(2)(x)
 
    return x,p
 
def bottleneck(filters, filter_size, previous_layer):
    x = layers.Conv2D(filters, filter_size, padding="same")(previous_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Dropout(.5)(x)
    x = layers.Conv2D(filters, filter_size, padding="same")(x)
 
    residual = layers.Conv2D(filters, 1, padding="same")(previous_layer)      #separate layer for addintion
    x = layers.add([x, residual])  # Add back residual
  
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
 
    return x
 
def upblock(filters, filter_size, previous_layer, layer_to_concat):
    x = layers.Conv2DTranspose(filters, filter_size, strides=2, padding="same")(previous_layer)       #upconvolution
    concat = layers.concatenate([x, layer_to_concat])                                                      #concatenation
 
    x = layers.Conv2D(filters, filter_size, padding="same")(concat)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    x = layers.Conv2D(filters, filter_size, padding="same")(x)
    x = layers.BatchNormalization()(x)
 
    residual = layers.Conv2D(filters, 1, padding="same")(concat)      #separate layer for addintion
    x = layers.add([x, residual])  # Add back residual
  
    x = layers.Activation("relu")(x)
 
    return x

In [28]:
input_layer = layers.Input(shape = img_size + (3,))
 
conv1, pool1 = downblock(32, 3, input_layer)
conv2, pool2 = downblock(64, 3, pool1)
conv3, pool3 = downblock(128, 3, pool2)
conv4, pool4 = downblock(256, 3, pool3)
 
conv5 = bottleneck(512,3,pool4)

upconv1 = upblock(256, 3, conv5, conv4) 
upconv2 = upblock(128, 3, upconv1, conv3)
upconv3 = upblock(64, 3, upconv2, conv2)
upconv4 = upblock(32, 3, upconv3, conv1)
 
output_layer = layers.Conv2D(1, 1, padding="same", activation='sigmoid')(upconv4)
model = keras.Model(input_layer, output_layer)
model.summary()

In [29]:
img_file = "./combined.png"
tf.keras.utils.plot_model(model, to_file= img_file, show_shapes=True, show_layer_names=True)

In [30]:
from tensorflow.keras.optimizers import Adam
opt = Adam(learning_rate=0.001)
 
model.compile(optimizer=opt, loss="binary_crossentropy", 
              metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes=2)]) # metrics=[tf.keras.metrics.MeanIoU(num_classes=2)]
 
filepath = "./model_epoch_{epoch:00d}_val_loss_{val_loss:03f}.h5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, save_best_only= False)
 
callbacks = [checkpoint]

In [31]:
start = datetime.now()
 
# Train the model, doing validation at the end of each epoch.
epochs = 5
model_history = model.fit(train_gen, epochs=epochs, validation_data=val_gen, callbacks=callbacks)
 
end = datetime.now()
print(f'Time take to train {epochs} epochs is:', end - start)

    Epoch 1/5
    9621/9621 [==============================] - 2631s 272ms/step - loss: 0.1776 - accuracy: 0.9269 - mean_io_u: 0.4290 - val_loss: 0.2519 - val_accuracy: 0.9087 - val_mean_io_u: 0.4162
    Epoch 2/5
    9621/9621 [==============================] - 2753s 286ms/step - loss: 0.1169 - accuracy: 0.9528 - mean_io_u: 0.4290 - val_loss: 0.2212 - val_accuracy: 0.9145 - val_mean_io_u: 0.4162
    Epoch 3/5
    9621/9621 [==============================] - 2748s 286ms/step - loss: 0.1022 - accuracy: 0.9591 - mean_io_u: 0.4290 - val_loss: 0.2340 - val_accuracy: 0.9169 - val_mean_io_u: 0.4162
    Epoch 4/5
    9621/9621 [==============================] - 2737s 284ms/step - loss: 0.0929 - accuracy: 0.9630 - mean_io_u: 0.4290 - val_loss: 0.2172 - val_accuracy: 0.9222 - val_mean_io_u: 0.4162
    Epoch 5/5
    9621/9621 [==============================] - 2847s 296ms/step - loss: 0.0860 - accuracy: 0.9659 - mean_io_u: 0.4290 - val_loss: 0.2364 - val_accuracy: 0.9228 - val_mean_io_u: 0.4162
    
**Time take to train 5 epochs is: 3:48:37.163636**

In [32]:
model_history.history 

In [34]:
history = model_history.history 

In [35]:
train_loss = history['loss']
val_loss = history['val_loss']
train_acc = history['accuracy']
val_acc = history['val_accuracy']
train_iou = history['mean_io_u']
val_iou = history['val_mean_io_u']

plt.figure(figsize=(14,6))

plt.subplot(1,3,1)
plt.plot(train_loss, 'r', label='Training loss')
plt.plot(val_loss, 'b', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss Value')
plt.yticks(np.arange(0, .5,.05))
plt.legend()

plt.subplot(1,3,2)
plt.plot(train_acc, 'r', label='Training acc')
plt.plot(val_acc, 'b', label='Validation acc')
plt.title('Training and Validation acc')
plt.xlabel('Epoch')
plt.ylabel('acc')
plt.yticks(np.arange(0,1.1,.1))
plt.legend()

plt.subplot(1,3,3)
plt.plot(train_iou, 'r', label='Training mean_io_u')
plt.plot(val_iou, 'b', label='Validation mean_io_u')
plt.title('Training and Validation mean_io_u')
plt.xlabel('Epoch')
plt.ylabel('mean_io_u')
plt.yticks(np.arange(0,1,.05))
plt.legend()
plt.show()

# Тестирование

In [36]:
from PIL import Image

In [37]:
model = keras.models.load_model(f"./model_epoch_{epochs}_val_loss_{str(history['val_loss'][-1])[:8]}.h5")

In [38]:
def ploting(imgpath, maskpath):
    plt.figure(figsize=(18,6))

    im = io.imread(imgpath)
    im = cv2.resize(im,img_size)
    im = np.array(im)/255

    plt.subplot(1,3,1)
    plt.title('Original')
    plt.imshow(im)

    im = im.reshape((1,)+im.shape)
    im.shape

    pred = model.predict(im)

    p = pred.copy()
    p = p.reshape(p.shape[1:-1])

    p[np.where(p>.2)] = 1
    p[np.where(p<.2)] = 0

    im = io.imread(imgpath)
    im = cv2.resize(im,img_size)
    im = np.array(im)

    im[:,:,0] = im[:,:,0]*p 
    im[:,:,0][np.where(p!=1)] = 247
    im[:,:,1] = im[:,:,1]*p 
    im[:,:,1][np.where(p!=1)] = 231
    im[:,:,2] = im[:,:,2]*p
    im[:,:,2][np.where(p!=1)] = 230

    plt.subplot(1,3,2)
    plt.imshow(im)

    if maskpath:
        plt.subplot(1,3,3)
        mask = io.imread(maskpath)
        plt.imshow(mask)

        plt.show()

Результаты на валидационной выборке

In [39]:
n_images = 5
for i in np.random.randint(0,len(val_input_img_paths),n_images):
    ploting(val_input_img_paths[i], val_target_img_paths[i])

## Красивый вывод результатов тестирования

In [40]:
def plot_and_save(imgpath):
    plt.figure(figsize=(18,6))

    im = io.imread(imgpath)
    im_orig_size = im.shape[:-1]
    im = cv2.resize(im,img_size)
    im = np.array(im)/255

    plt.subplot(1,3,1)
    plt.title('Original')
    plt.imshow(im)

    im = im.reshape((1,)+im.shape)
    im.shape

    pred = model.predict(im)

    p = pred.copy()
    p = p.reshape(p.shape[1:-1])

    p[np.where(p>.2)] = 1
    p[np.where(p<.2)] = 0

    im = io.imread(imgpath)
    im = cv2.resize(im,img_size)
    im = np.array(im)

    im[:,:,0] = im[:,:,0]*p 
    im[:,:,0][np.where(p!=1)] = 247
    im[:,:,1] = im[:,:,1]*p 
    im[:,:,1][np.where(p!=1)] = 231
    im[:,:,2] = im[:,:,2]*p
    im[:,:,2][np.where(p!=1)] = 230

    plt.subplot(1,3,2)
    # im = cv2.resize(im,im_orig_size[::-1])
    plt.imshow(im)

    im = cv2.resize(im,im_orig_size[::-1])
    im = Image.fromarray(im)
    im.save(f"without_bg_{imgpath[2:]}")

In [41]:
!wget https://m.media-amazon.com/images/M/MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg

![](./MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg)

In [42]:
plot_and_save("./MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg")

![](./without_bg_MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg)

In [43]:
!rm -r pic.png
!rm -r ./w_bg.png

im = Image.open('./without_bg_MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg')
im.save('pic.png')

def magic(img_path, orig_path):
    img = Image.open(img_path)
    orig = Image.open(orig_path)
    
    img = img.convert("RGBA")
    orig = orig.convert("RGBA")
    datas = img.getdata()
    orig_data = orig.getdata()

    newData = []
    colors = [247, 231, 230]
    for item, item_orig in zip(datas, orig_data):
        if item[0] in colors or item[1] in colors or item[2] in colors:
            newData.append((255, 255, 255, 0))
        else:
            newData.append(item_orig)

    img.putdata(newData)
    img.save(f"w_bg.png", "PNG")
    
magic('pic.png', "./MV5BNDMzNWE3N2QtY2Q5MS00N2M2LTk0YjctMWEzNWYyYWI0YTgxXkEyXkFqcGdeQXVyMjMzMDI4MjQ@._V1_.jpg")

![](./w_bg.png)

# Источники:

* [COCO-Person-Segmentation](https://www.kaggle.com/datasets/oishee30/cocopersonsegmentation)
* [Person segmentation dataset](https://www.kaggle.com/datasets/furkankati/person-segmentation-dataset)
* [Automatic_Background_Removal](https://github.com/G0rav/Automatic_Background_Removal)
* [COCO датасет](https://cocodataset.org/#home)
* [Background removal with deep learning](https://towardsdatascience.com/background-removal-with-deep-learning-c4f2104b3157)
