### Importing libraries

In [1]:
import os
import numpy as np

from PIL import Image

In [2]:
image_dir = 'Pascal-part/JPEGImages'
mask_dir = 'Pascal-part/gt_masks'

### Data Preprocessing

In [3]:
def dataset_freq(image_dir, mode='train'):
    dataset = 'Pascal-part/' + mode + '_id.txt'
    size_count = {}
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            with Image.open(image_path) as img:
                size = img.size 
                if size in size_count:
                    size_count[size] += 1
                else:
                    size_count[size] = 1
    # Sort the dictionary by count in decreasing order
    sorted_size_count = dict(sorted(size_count.items(), key=lambda item: item[1], reverse=True))
    return sorted_size_count

dataset_freq(image_dir)

{(500, 375): 1203,
 (500, 333): 352,
 (375, 500): 272,
 (333, 500): 101,
 (500, 334): 67,
 (500, 332): 55,
 (500, 374): 21,
 (500, 335): 19,
 (500, 338): 18,
 (334, 500): 17,
 (500, 400): 14,
 (500, 331): 14,
 (480, 360): 14,
 (332, 500): 13,
 (500, 281): 12,
 (500, 336): 10,
 (500, 377): 10,
 (500, 371): 9,
 (500, 357): 9,
 (500, 326): 9,
 (500, 337): 8,
 (500, 376): 8,
 (500, 500): 8,
 (400, 500): 7,
 (374, 500): 6,
 (335, 447): 6,
 (320, 240): 6,
 (338, 500): 5,
 (328, 500): 5,
 (320, 213): 5,
 (320, 480): 5,
 (448, 336): 5,
 (500, 340): 5,
 (400, 300): 5,
 (500, 378): 5,
 (377, 500): 5,
 (320, 426): 4,
 (360, 480): 4,
 (500, 369): 4,
 (500, 389): 4,
 (500, 329): 4,
 (500, 366): 4,
 (500, 322): 4,
 (335, 500): 4,
 (500, 328): 4,
 (331, 500): 4,
 (357, 500): 4,
 (500, 346): 4,
 (500, 398): 4,
 (337, 500): 4,
 (500, 382): 4,
 (500, 341): 4,
 (500, 358): 4,
 (500, 368): 4,
 (447, 335): 3,
 (367, 500): 3,
 (415, 500): 3,
 (500, 324): 3,
 (500, 345): 3,
 (500, 343): 3,
 (378, 500): 3,
 (

We need images of the same size to begin training model

In [4]:
def dataset_max_image_size(image_dir, mode='train'):
    max_height, max_width = 0, 0
    dataset = 'Pascal-part/' + mode + '_id.txt'
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            with Image.open(image_path) as image:
                max_height, max_width = max(max_height, image.height), max(max_width, image.width)
    return max_height, max_width

dataset_max_image_size(image_dir)

(500, 500)

I will do padding to (500, 500, 3) for all images and padding to (500, 500) for all masks. It's a function for making np.array square-shaped:

In [5]:
def padding(arr, target_size=(500, 500, 3)):
    pad_width_rows = (target_size[0] - arr.shape[0])
    pad_width_cols = (target_size[1] - arr.shape[1])
    pad_width = [
        (pad_width_rows // 2, pad_width_rows - (pad_width_rows // 2)),
        (pad_width_cols // 2, pad_width_cols - (pad_width_cols // 2)),
    ]
    if len(target_size) == 3:
        pad_width += [(0, 0)]
    return np.pad(arr, pad_width, 'constant')

Now we can do padding for each jpg file and each corresponding mask:

In [1]:
def dataset_padding(image_dir, mask_dir, mode='train'):
    h, w = dataset_max_image_size(image_dir, mode)
    dataset = 'Pascal-part/' + mode + '_id.txt'
    X, y = [], []
    with open(dataset, 'r') as file:
        for filename in file.readlines():
            image_path = os.path.join(image_dir, filename.strip() + '.jpg')
            mask_path = os.path.join(mask_dir, filename.strip() + '.npy')
            with Image.open(image_path) as image:
                img = np.array(image)
                padded_img = padding(img, (h, w, 3))
                X.append(padded_img)
                with open(mask_path, 'rb') as filemask:
                    mask = np.load(filemask)
                    padded_mask = padding(mask, (h, w))
                    y.append(padded_mask)

    X = np.array(X)
    y = np.array(y)
    # y = to_categorical(y, num_classes=7)
    return X, y


In [None]:
X_train, y_train = dataset_padding(image_dir, mask_dir, mode='train')
X_val, y_val = dataset_padding(image_dir, mask_dir, mode='val')

Now we have images and masks of the same size and can train model

### Building a Model

I will choose Keras, but also we can choose Pytorch and Tensorflow for building and training model. 

Importing libraries

In [2]:
from keras import Input, Model
from keras.layers import *
from keras import backend as be
from keras.optimizers import *
from keras.losses import *
from preproccesing import dataset_padding
from keras.preprocessing.image import ImageDataGenerator

: 

: 