Постройте сегментацию изображений облаков типа Fish, используя сети Unet, PSPNet или FPN. В качестве базовых сетей можно использовать ResNet, MobileNet, DenseNet или любые другие подходящие. Можно использовать обученные модели сетей (входные размеры 384х256).

Постройте ансамбль предсказаний, выбирая среднее значение из нескольких. Выгрузите результаты предсказания в требуемом формате (sample_submission.csv).

Данные:
* video.ittensive.com/machine-learning/clouds/train.csv.gz (54 Мб)
* video.ittensive.com/machine-learning/clouds/train_images_small.tar.gz (212 Мб)
* video.ittensive.com/machine-learning/clouds/test_images_small.tar.gz (142 Мб)
* video.ittensive.com/machine-learning/clouds/sample_submission.csv.gz

Модели:
* video.ittensive.com/machine-learning/clouds/unet.fish.h5
* video.ittensive.com/machine-learning/clouds/fpn.fish.h5
* video.ittensive.com/machine-learning/clouds/pspnet.fish.h5

Итоговый файл с кодом (.py или .ipynb) выложите в github с портфолио.

In [5]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from imgaug.augmentables.segmaps import SegmentationMapsOnImage
#import segmentation_models as sm
import numpy as np
import pandas as pd
import keras
import segmentation_models as sm
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
#import keras.utils as image
from keras.models import Model,load_model
from keras.layers import Input, concatenate, ZeroPadding2D
from keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose
from keras import optimizers
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import sys
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Segmentation Models: using `keras` framework.


In [None]:
batch_size = 15
filesDir = "train_images_small"
image_x = 384 #480 # 525
image_y = 256 #320 # 350
image_ch = 3 # 3
mask_x = 384
mask_y = 256
def mask_rate (a, x, y):
    b = a//1400 + 0.0
    return np.round(x*(b*x//2100) + y*(a%1400)//1400).astype("uint32")

def calc_mask (px, x=image_x, y=image_y):
    p = np.array([int(n) for n in px.split(' ')]).reshape(-1,2)
    mask = np.zeros(x*y, dtype='uint8')
    for i, l in p:
        mask[mask_rate(i, x, y) - 1:mask_rate(l+i, x, y)] = 1
    return mask.reshape(y,x).transpose()

def calc_dice (x):
    dice = 0
    px = x["EncodedPixels"] 
    if px != px and x["target"] == 0:
        dice = 1
    elif px == px and x["target"] == 1:
        mask = calc_mask(px).flatten()
        target = np.array(x["TargetPixels"].split(" ")).astype("int8")
        dice += 2*np.sum(target[mask==1])/(np.sum(target)+np.sum(mask))
    return dice

def load_y (df):
    y = [[0]]*len(df)
    for i, ep in enumerate(df["EncodedPixels"]):
        if ep == ep:
            y[i] = calc_mask(ep, mask_x, mask_y).transpose().flatten()
        else:
            y[i] = np.zeros(mask_x*mask_y, dtype="i1")
    return np.array(y).reshape(len(df), mask_y, mask_x, 1)

def load_x (df):
    x = [[]]*len(df)
    for j, file in enumerate(df["Image"]):
        img = image.load_img(os.path.join(filesDir, file),
                     target_size=(image_y, image_x))
        img = image.img_to_array(img)
        x[j] = np.expand_dims(img, axis=0)
    return np.array(x).reshape(len(df), image_y, image_x, image_ch)

def load_data (df, batch_size):
    while True:
        batch_start = 0
        batch_end = batch_size
        while batch_start < len(df):
            limit = min(batch_end, len(df))
            yield (load_x(df[batch_start:limit]),
                   load_y(df[batch_start:limit]))
            batch_start += batch_size   
            batch_end += batch_size

def draw_prediction (prediction):
    fig = plt.figure(figsize=(16, 8))
    ax = fig.add_subplot(1,1,1)
    ax.hist(prediction[0])
    ax.set_title("Fish")
    plt.show()


### Загрузка данных

## Загрузка данных

In [9]:
data = pd.read_csv('https://video.ittensive.com/machine-learning/clouds/train.csv.gz')

In [10]:
data["Image"] = data["Image_Label"].str.split("_").str[0]
data["Label"] = data["Image_Label"].str.split("_").str[1]
data["target"] = 0
data.drop(labels=["Image_Label"], axis=1, inplace=True)
data=data.loc[data['Label']=='Fish']
print (data.head())

                                        EncodedPixels        Image Label  \
0   264918 937 266318 937 267718 937 269118 937 27...  0011165.jpg  Fish   
4   233813 878 235213 878 236613 878 238010 881 23...  002be4f.jpg  Fish   
8   3510 690 4910 690 6310 690 7710 690 9110 690 1...  0031ae9.jpg  Fish   
12                                                NaN  0035239.jpg  Fish   
16  2367966 18 2367985 2 2367993 8 2368002 62 2369...  003994e.jpg  Fish   

    target  
0        0  
4        0  
8        0  
12       0  
16       0  


In [11]:
train, test = train_test_split(data, test_size=0.2)
train = pd.DataFrame(train)
test = pd.DataFrame(test)
del data
print (train.head())

                                           EncodedPixels        Image Label  \
4724   69099 810 70499 810 71899 810 73299 810 74699 ...  3671c62.jpg  Fish   
11128                                                NaN  7e820d6.jpg  Fish   
5740                                                 NaN  4203100.jpg  Fish   
1380                                                 NaN  0ff2c12.jpg  Fish   
8368                                                 NaN  5f58f6d.jpg  Fish   

       target  
4724        0  
11128       0  
5740        0  
1380        0  
8368        0  


# U-Net

In [7]:
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1)

def dice_coef_loss(y_true, y_pred):
    return 1-dice_coef(y_true, y_pred)

In [8]:
inputs = Input((image_y, image_x, image_ch))
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv5)

up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2),
                        padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv6)

up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2),
                        padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv7)

up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2),
                        padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv8)

up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2),
                        padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='glorot_uniform')(conv9)

conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

model = Model(inputs=[inputs], outputs=[conv10])







In [9]:
model.compile(optimizer=optimizers.Nadam(lr=1e-5),
             loss=dice_coef_loss, metrics=["mae"])
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 384, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 384, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 256, 384, 32) 9248        conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 128, 192, 32) 0           conv2d_2[0][0]                   
__________________________________________________________________________________________________
conv2d_3 

In [10]:
model.fit_generator(load_data(train, batch_size),
            epochs=5, steps_per_epoch=len(train)//batch_size)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x201d2db85f8>

Предсказание U-Net

In [11]:
prediction_unet = model.predict_generator(load_data(test, 1),
                            steps=len(test), verbose=1)



# FPN

In [6]:
BACKBONE = 'resnet50'
preprocess_input = sm.get_preprocessing(BACKBONE)
model_fpn = sm.FPN(BACKBONE, encoder_weights='imagenet',
              encoder_freeze=True, classes=1, activation="sigmoid")












In [7]:
model_fpn.compile(optimizers.Nadam(lr=0.03),
             loss=sm.losses.dice_loss, metrics=[sm.metrics.iou_score])
model_fpn.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               (None, None, None, 3 0                                            
__________________________________________________________________________________________________
bn_data (BatchNormalization)    (None, None, None, 3 9           data[0][0]                       
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, None, None, 3 0           bn_data[0][0]                    
__________________________________________________________________________________________________
conv0 (Conv2D)                  (None, None, None, 6 9408        zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
bn0 (Batc

In [None]:
model_fpn.fit_generator(load_data(train, batch_size),
                   epochs=5, steps_per_epoch=len(train)//batch_size,
                   callbacks=[ModelCheckpoint("clouds.h5", mode='auto', monitor='val_loss')])

Epoch 1/5

In [None]:
prediction_fpn = model_fpn.predict_generator(load_data(test, 1),
                                    steps=len(test), verbose=1)

In [None]:
pred_fpn = prediction_fpn[0].reshape(image_y, image_x).astype("uint8")

## Усреднение предсказаний

In [None]:
def prep_pred (p):
    return np.transpose(MinMaxScaler().fit_transform(p))

In [None]:
prediction_unet = prep_pred(prediction_unet)

In [None]:
prediction_fpn = prep_pred(pred_fpn)

In [None]:
prediction = prediction_unet + prediction_fpn
draw_prediction(prediction)